diff --git a/8209375-ZGC-Use-dynamic-base-address-for-mark-stack-.patch b/8209375-ZGC-Use-dynamic-base-address-for-mark-stack-.patch
deleted file mode 100644
index dfad78c3f575689a322484d1c66ab08bedeeb50b..0000000000000000000000000000000000000000
--- a/8209375-ZGC-Use-dynamic-base-address-for-mark-stack-.patch
+++ /dev/null
@@ -1,184 +0,0 @@
-From 476ec6be3f75c70c50bd1552c624abca098ddba2 Mon Sep 17 00:00:00 2001
-Date: Wed, 18 Mar 2020 10:25:06 +0000
-Subject: [PATCH] 8209375: ZGC: Use dynamic base address for mark stack space
-
-Summary: <gc>: <mark stack needs atomic change>
-LLT: jdk11u/test/hotspot/jtreg/vmTestbase/gc/gctests/SoftReference/soft004/soft004.java
-Bug url: https://bugs.openjdk.java.net/browse/JDK-8209375
----
- src/hotspot/share/gc/z/zGlobals.hpp   |  7 +---
- src/hotspot/share/gc/z/zMarkStack.cpp | 74 +++++++++++++++--------------------
- src/hotspot/share/gc/z/zMarkStack.hpp |  1 +
- src/hotspot/share/gc/z/z_globals.hpp  |  6 +--
- 4 files changed, 38 insertions(+), 50 deletions(-)
-
-diff --git a/src/hotspot/share/gc/z/zGlobals.hpp b/src/hotspot/share/gc/z/zGlobals.hpp
-index 080ea5c0e..0f9e9dcb4 100644
---- a/src/hotspot/share/gc/z/zGlobals.hpp
-+++ b/src/hotspot/share/gc/z/zGlobals.hpp
-@@ -117,11 +117,8 @@ extern uintptr_t  ZAddressWeakBadMask;
- // Marked state
- extern uintptr_t  ZAddressMetadataMarked;
- 
--// Address space for mark stack allocations
--const size_t      ZMarkStackSpaceSizeShift      = 40; // 1TB
--const size_t      ZMarkStackSpaceSize           = (size_t)1 << ZMarkStackSpaceSizeShift;
--const uintptr_t   ZMarkStackSpaceStart          = ZAddressSpaceEnd + ZMarkStackSpaceSize;
--const uintptr_t   ZMarkStackSpaceEnd            = ZMarkStackSpaceStart + ZMarkStackSpaceSize;
-+// Mark stack space
-+extern uintptr_t  ZMarkStackSpaceStart;
- const size_t      ZMarkStackSpaceExpandSize     = (size_t)1 << 25; // 32M
- 
- // Mark stack and magazine sizes
-diff --git a/src/hotspot/share/gc/z/zMarkStack.cpp b/src/hotspot/share/gc/z/zMarkStack.cpp
-index 52fe51ece..9cc768956 100644
---- a/src/hotspot/share/gc/z/zMarkStack.cpp
-+++ b/src/hotspot/share/gc/z/zMarkStack.cpp
-@@ -28,58 +28,44 @@
- #include "gc/z/zMarkStack.inline.hpp"
- #include "logging/log.hpp"
- #include "runtime/atomic.hpp"
-+#include "runtime/os.hpp"
- #include "utilities/debug.hpp"
- 
--#include <sys/mman.h>
--#include <sys/types.h>
-+uintptr_t ZMarkStackSpaceStart;
- 
- ZMarkStackSpace::ZMarkStackSpace() :
-     _expand_lock(),
-+    _start(0),
-     _top(0),
-     _end(0) {
--  assert(ZMarkStacksMax >= ZMarkStackSpaceExpandSize, "ZMarkStacksMax too small");
--  assert(ZMarkStacksMax <= ZMarkStackSpaceSize, "ZMarkStacksMax too large");
--
-+  assert(ZMarkStackSpaceLimit >= ZMarkStackSpaceExpandSize, "ZMarkStackSpaceLimit too small");
-   // Reserve address space
--  const void* res = mmap((void*)ZMarkStackSpaceStart, ZMarkStackSpaceSize,
--                         PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
--  if (res != (void*)ZMarkStackSpaceStart) {
--    log_error(gc, marking)("Failed to reserve address space for marking stacks");
-+  const size_t size = ZMarkStackSpaceLimit;
-+  const size_t alignment = (size_t)os::vm_allocation_granularity();
-+  const uintptr_t addr = (uintptr_t)os::reserve_memory(size, NULL, alignment, mtGC);
-+  if (addr == 0) {
-+    log_error(gc, marking)("Failed to reserve address space for mark stacks");
-     return;
-   }
- 
-   // Successfully initialized
--  _top = _end = ZMarkStackSpaceStart;
--}
-+  _start = _top = _end = addr;
- 
--bool ZMarkStackSpace::is_initialized() const {
--  return _top != 0;
-+  // Register mark stack space start
-+  ZMarkStackSpaceStart = _start;
- }
- 
--bool ZMarkStackSpace::expand() {
--  const size_t max = ZMarkStackSpaceStart + ZMarkStacksMax;
--  if (_end + ZMarkStackSpaceExpandSize > max) {
--    // Expansion limit reached
--    return false;
--  }
--
--  void* const res = mmap((void*)_end, ZMarkStackSpaceExpandSize,
--                         PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED, -1, 0);
--  if (res == MAP_FAILED) {
--    ZErrno err;
--    log_error(gc, marking)("Failed to map memory for marking stacks (%s)", err.to_string());
--    return false;
--  }
--
--  return true;
-+bool ZMarkStackSpace::is_initialized() const {
-+  return _start != 0;
- }
- 
- uintptr_t ZMarkStackSpace::alloc_space(size_t size) {
--  uintptr_t top = _top;
-+  uintptr_t top = Atomic::load(&_top);
- 
-   for (;;) {
-+    const uintptr_t end = Atomic::load(&_end);
-     const uintptr_t new_top = top + size;
--    if (new_top > _end) {
-+    if (new_top > end) {
-       // Not enough space left
-       return 0;
-     }
-@@ -104,24 +90,28 @@ uintptr_t ZMarkStackSpace::expand_and_alloc_space(size_t size) {
-     return addr;
-   }
- 
--  // Expand stack space
--  if (!expand()) {
--    // We currently can't handle the situation where we
--    // are running out of mark stack space.
--    fatal("Mark stack overflow (allocated " SIZE_FORMAT "M, size " SIZE_FORMAT "M, max " SIZE_FORMAT "M),"
--          " use -XX:ZMarkStacksMax=? to increase this limit",
--          (_end - ZMarkStackSpaceStart) / M, size / M, ZMarkStacksMax / M);
--    return 0;
-+  // Check expansion limit
-+  const size_t expand_size = ZMarkStackSpaceExpandSize;
-+  const size_t old_size = _end - _start;
-+  const size_t new_size = old_size + expand_size;
-+  if (new_size > ZMarkStackSpaceLimit) {
-+    // Expansion limit reached. This is a fatal error since we
-+    // currently can't recover from running out of mark stack space.
-+    fatal("Mark stack space exhausted. Use -XX:ZMarkStackSpaceLimit=<size> to increase the "
-+          "maximum number of bytes allocated for mark stacks. Current limit is " SIZE_FORMAT "M.",
-+          ZMarkStackSpaceLimit / M);
-   }
- 
-   log_debug(gc, marking)("Expanding mark stack space: " SIZE_FORMAT "M->" SIZE_FORMAT "M",
--                         (_end - ZMarkStackSpaceStart) / M,
--                         (_end - ZMarkStackSpaceStart + ZMarkStackSpaceExpandSize) / M);
-+                         old_size / M, new_size / M);
-+
-+  // Expand
-+  os::commit_memory_or_exit((char*)_end, expand_size, false /* executable */, "Mark stack space");
- 
-   // Increment top before end to make sure another
-   // thread can't steal out newly expanded space.
-   addr = Atomic::add(size, &_top) - size;
--  _end += ZMarkStackSpaceExpandSize;
-+  Atomic::add(expand_size, &_end);
- 
-   return addr;
- }
-diff --git a/src/hotspot/share/gc/z/zMarkStack.hpp b/src/hotspot/share/gc/z/zMarkStack.hpp
-index b68b9faa3..12f3e4eca 100644
---- a/src/hotspot/share/gc/z/zMarkStack.hpp
-+++ b/src/hotspot/share/gc/z/zMarkStack.hpp
-@@ -76,6 +76,7 @@ typedef ZStackList<ZMarkStackMagazine>               ZMarkStackMagazineList;
- class ZMarkStackSpace {
- private:
-   ZLock              _expand_lock;
-+  uintptr_t          _start;
-   volatile uintptr_t _top;
-   volatile uintptr_t _end;
- 
-diff --git a/src/hotspot/share/gc/z/z_globals.hpp b/src/hotspot/share/gc/z/z_globals.hpp
-index 9e0f8985b..8cee59be7 100644
---- a/src/hotspot/share/gc/z/z_globals.hpp
-+++ b/src/hotspot/share/gc/z/z_globals.hpp
-@@ -53,9 +53,9 @@
-           "Allow Java threads to stall and wait for GC to complete "        \
-           "instead of immediately throwing an OutOfMemoryError")            \
-                                                                             \
--  product(size_t, ZMarkStacksMax, NOT_LP64(512*M) LP64_ONLY(8*G),           \
--          "Maximum number of bytes allocated for marking stacks")           \
--          range(32*M, NOT_LP64(512*M) LP64_ONLY(1024*G))                    \
-+  product(size_t, ZMarkStackSpaceLimit, 8*G,                                \
-+          "Maximum number of bytes allocated for mark stacks")              \
-+          range(32*M, 1024*G)                                               \
-                                                                             \
-   product(uint, ZCollectionInterval, 0,                                     \
-           "Force GC at a fixed time interval (in seconds)")                 \
--- 
-2.12.3
-
diff --git a/8209894-ZGC-Cap-number-of-GC-workers-based-on-heap-s.patch b/8209894-ZGC-Cap-number-of-GC-workers-based-on-heap-s.patch
deleted file mode 100644
index 6ba8ad526b9d5d8380afa4977bfc0d9432eb27b8..0000000000000000000000000000000000000000
--- a/8209894-ZGC-Cap-number-of-GC-workers-based-on-heap-s.patch
+++ /dev/null
@@ -1,87 +0,0 @@
-From 7ca249ae82c6b6c60c524781806f9d12ef3f8f98 Mon Sep 17 00:00:00 2001
-Date: Mon, 16 Mar 2020 16:24:43 +0800
-Subject: [PATCH] 8209894: ZGC: Cap number of GC workers based on heap size
-
-Summary: <gc>: <cap number of GC workers based on heap size>
-LLT: jdk11u/test/hotspot/jtreg/vmTestbase/nsk/jdi/ObjectReference/disableCollection/disablecollection002/TestDescription.java
-Bug url: https://bugs.openjdk.java.net/browse/JDK-8209894
----
- src/hotspot/share/gc/z/zWorkers.cpp | 23 ++++++++++++++++++-----
- src/hotspot/share/gc/z/zWorkers.hpp |  4 +---
- 2 files changed, 19 insertions(+), 8 deletions(-)
-
-diff --git a/src/hotspot/share/gc/z/zWorkers.cpp b/src/hotspot/share/gc/z/zWorkers.cpp
-index 0686ec7af..6a0c2561d 100644
---- a/src/hotspot/share/gc/z/zWorkers.cpp
-+++ b/src/hotspot/share/gc/z/zWorkers.cpp
-@@ -1,5 +1,5 @@
- /*
-- * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -22,14 +22,27 @@
-  */
- 
- #include "precompiled.hpp"
-+#include "gc/z/zGlobals.hpp"
- #include "gc/z/zTask.hpp"
- #include "gc/z/zWorkers.inline.hpp"
- #include "runtime/os.hpp"
- #include "runtime/mutexLocker.hpp"
- #include "runtime/safepoint.hpp"
- 
--uint ZWorkers::calculate_ncpus(double share_in_percent) {
--  return ceil(os::initial_active_processor_count() * share_in_percent / 100.0);
-+static uint calculate_nworkers_based_on_ncpus(double cpu_share_in_percent) {
-+  return ceil(os::initial_active_processor_count() * cpu_share_in_percent / 100.0);
-+}
-+
-+static uint calculate_nworkers_based_on_heap_size(double reserve_share_in_percent) {
-+  const int nworkers = ((MaxHeapSize * (reserve_share_in_percent / 100.0)) - ZPageSizeMedium) / ZPageSizeSmall;
-+  return MAX2(nworkers, 1);
-+}
-+
-+static uint calculate_nworkers(double cpu_share_in_percent) {
-+  // Cap number of workers so that we never use more than 10% of the max heap
-+  // for the reserve. This is useful when using small heaps on large machines.
-+  return MIN2(calculate_nworkers_based_on_ncpus(cpu_share_in_percent),
-+              calculate_nworkers_based_on_heap_size(10.0));
- }
- 
- uint ZWorkers::calculate_nparallel() {
-@@ -38,7 +51,7 @@ uint ZWorkers::calculate_nparallel() {
-   // close to the number of processors tends to lead to over-provisioning and
-   // scheduling latency issues. Using 60% of the active processors appears to
-   // be a fairly good balance.
--  return calculate_ncpus(60.0);
-+  return calculate_nworkers(60.0);
- }
- 
- uint ZWorkers::calculate_nconcurrent() {
-@@ -48,7 +61,7 @@ uint ZWorkers::calculate_nconcurrent() {
-   // throughput, while using too few threads will prolong the GC-cycle and
-   // we then risk being out-run by the application. Using 12.5% of the active
-   // processors appears to be a fairly good balance.
--  return calculate_ncpus(12.5);
-+  return calculate_nworkers(12.5);
- }
- 
- class ZWorkersWarmupTask : public ZTask {
-diff --git a/src/hotspot/share/gc/z/zWorkers.hpp b/src/hotspot/share/gc/z/zWorkers.hpp
-index 36a3c61fd..6ce09c447 100644
---- a/src/hotspot/share/gc/z/zWorkers.hpp
-+++ b/src/hotspot/share/gc/z/zWorkers.hpp
-@@ -34,8 +34,6 @@ private:
-   bool     _boost;
-   WorkGang _workers;
- 
--  static uint calculate_ncpus(double share_in_percent);
--
-   void run(ZTask* task, uint nworkers);
- 
- public:
--- 
-2.12.3
-
diff --git a/8217856-ZGC-Break-out-C2-matching-rules-into-separat.patch b/8217856-ZGC-Break-out-C2-matching-rules-into-separat.patch
deleted file mode 100644
index b88b4bfecef68c704a43f77a4050494101550597..0000000000000000000000000000000000000000
--- a/8217856-ZGC-Break-out-C2-matching-rules-into-separat.patch
+++ /dev/null
@@ -1,906 +0,0 @@
-diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
-index 687896251..a39640526 100644
---- a/make/hotspot/gensrc/GensrcAdlc.gmk
-+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
-@@ -140,6 +140,12 @@ ifeq ($(call check-jvm-feature, compiler2), true)
-       $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \
-     )))
- 
-+  ifeq ($(call check-jvm-feature, zgc), true)
-+    AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
-+        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/z/z_$(HOTSPOT_TARGET_CPU).ad \
-+      )))
-+  endif
-+
-   ifeq ($(call check-jvm-feature, shenandoahgc), true)
-     AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
-         $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \
-diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
-index 29f81face..ab578476a 100644
---- a/src/hotspot/cpu/aarch64/aarch64.ad
-+++ b/src/hotspot/cpu/aarch64/aarch64.ad
-@@ -1128,13 +1128,6 @@ definitions %{
-   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
- %}
- 
--source_hpp %{
--
--#include "gc/z/c2/zBarrierSetC2.hpp"
--#include "gc/z/zThreadLocalData.hpp"
--
--%}
--
- //----------SOURCE BLOCK-------------------------------------------------------
- // This is a block of C++ code which provides values, functions, and
- // definitions necessary in the rest of the architecture description
-@@ -18110,243 +18103,6 @@ instruct vpopcount2I(vecD dst, vecD src) %{
-   ins_pipe(pipe_class_default);
- %}
- 
--source %{
--
--static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
--  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
--  __ ldr(tmp, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
--  __ andr(tmp, tmp, ref);
--  __ cbnz(tmp, *stub->entry());
--  __ bind(*stub->continuation());
--}
--
--static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
--  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
--  __ b(*stub->entry());
--  __ bind(*stub->continuation());
--}
--
--%}
--
--// Load Pointer
--instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr)
--%{
--  match(Set dst (LoadP mem));
--  predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierStrong));
--  effect(TEMP dst, KILL cr);
--
--  ins_cost(4 * INSN_COST);
--
--  format %{ "ldr  $dst, $mem" %}
--
--  ins_encode %{
--    const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
--    __ ldr($dst$$Register, ref_addr);
--    if (barrier_data() != ZLoadBarrierElided) {
--      z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, false /* weak */);
--    }
--  %}
--
--  ins_pipe(iload_reg_mem);
--%}
--
--// Load Weak Pointer
--instruct zLoadWeakP(iRegPNoSp dst, memory mem, rFlagsReg cr)
--%{
--  match(Set dst (LoadP mem));
--  predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierWeak));
--  effect(TEMP dst, KILL cr);
--
--  ins_cost(4 * INSN_COST);
--
--  format %{ "ldr  $dst, $mem" %}
--
--  ins_encode %{
--    const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
--    __ ldr($dst$$Register, ref_addr);
--    z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, true /* weak */);
--  %}
--
--  ins_pipe(iload_reg_mem);
--%}
--
--// Load Pointer Volatile
--instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg cr)
--%{
--  match(Set dst (LoadP mem));
--  predicate(UseZGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
--  effect(TEMP dst, KILL cr);
--
--  ins_cost(VOLATILE_REF_COST);
--
--  format %{ "ldar  $dst, $mem\t" %}
--
--  ins_encode %{
--    __ ldar($dst$$Register, $mem$$Register);
--    if (barrier_data() != ZLoadBarrierElided) {
--      z_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, false /* weak */);
--    }
--  %}
--
--  ins_pipe(pipe_serial);
--%}
--
--instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
--  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
--  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
--  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(KILL cr, TEMP_DEF res);
--
--  ins_cost(2 * VOLATILE_REF_COST);
-- 
--  format %{ "cmpxchg $mem, $oldval, $newval\n\t"
--            "cset    $res, EQ" %}
--
--  ins_encode %{
--    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
--    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
--               false /* acquire */, true /* release */, false /* weak */, rscratch2);
--    __ cset($res$$Register, Assembler::EQ);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
--      __ andr(rscratch1, rscratch1, rscratch2);
--      __ cbz(rscratch1, good);
--      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */);
--      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
--                 false /* acquire */, true /* release */, false /* weak */, rscratch2);
--      __ cset($res$$Register, Assembler::EQ);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
--  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
--  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
--  predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
--  effect(KILL cr, TEMP_DEF res);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
-- format %{ "cmpxchg $mem, $oldval, $newval\n\t"
--           "cset    $res, EQ" %}
--
--  ins_encode %{
--    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
--    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
--               true /* acquire */, true /* release */, false /* weak */, rscratch2);
--    __ cset($res$$Register, Assembler::EQ);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
--      __ andr(rscratch1, rscratch1, rscratch2);
--      __ cbz(rscratch1, good);
--      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ );
--      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
--                 true /* acquire */, true /* release */, false /* weak */, rscratch2);
--      __ cset($res$$Register, Assembler::EQ);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
--  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
--  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(TEMP_DEF res, KILL cr);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
--
--  ins_encode %{
--    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
--    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
--               false /* acquire */, true /* release */, false /* weak */, $res$$Register);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
--      __ andr(rscratch1, rscratch1, $res$$Register);
--      __ cbz(rscratch1, good);
--      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
--      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
--                 false /* acquire */, true /* release */, false /* weak */, $res$$Register);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
--  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
--  predicate(UseZGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(TEMP_DEF res, KILL cr);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
--
--  ins_encode %{
--    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
--    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
--               true /* acquire */, true /* release */, false /* weak */, $res$$Register);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
--      __ andr(rscratch1, rscratch1, $res$$Register);
--      __ cbz(rscratch1, good);
--      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
--      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
--                 true /* acquire */, true /* release */, false /* weak */, $res$$Register);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_slow);
--%}
--
--instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
--  match(Set prev (GetAndSetP mem newv));
--  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(TEMP_DEF prev, KILL cr);
--
--  ins_cost(2 * VOLATILE_REF_COST);
--
--  format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
--
--  ins_encode %{
--    __ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register);
--    if (barrier_data() != ZLoadBarrierElided) {
--      z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
--    }
--  %}
--
--  ins_pipe(pipe_serial);
--%}
--
--instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
--  match(Set prev (GetAndSetP mem newv));
--  predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
--  effect(TEMP_DEF prev, KILL cr);
--
--  ins_cost(VOLATILE_REF_COST);
--
--  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
--
--  ins_encode %{
--    __ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register);
--    if (barrier_data() != ZLoadBarrierElided) {
--      z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
--    }
--  %}
--  ins_pipe(pipe_serial);
--%}
- 
- //----------PEEPHOLE RULES-----------------------------------------------------
- // These must follow all instruction definitions as they use the names
-diff --git a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
-new file mode 100644
-index 000000000..50cc6f924
---- /dev/null
-+++ b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
-@@ -0,0 +1,268 @@
-+//
-+// Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+
-+source_hpp %{
-+
-+#include "gc/z/c2/zBarrierSetC2.hpp"
-+#include "gc/z/zThreadLocalData.hpp"
-+
-+%}
-+
-+source %{
-+
-+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
-+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
-+  __ ldr(tmp, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
-+  __ andr(tmp, tmp, ref);
-+  __ cbnz(tmp, *stub->entry());
-+  __ bind(*stub->continuation());
-+}
-+
-+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
-+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
-+  __ b(*stub->entry());
-+  __ bind(*stub->continuation());
-+}
-+
-+%}
-+
-+// Load Pointer
-+instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr)
-+%{
-+  match(Set dst (LoadP mem));
-+  predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierStrong));
-+  effect(TEMP dst, KILL cr);
-+
-+  ins_cost(4 * INSN_COST);
-+
-+  format %{ "ldr  $dst, $mem" %}
-+
-+  ins_encode %{
-+    const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
-+    __ ldr($dst$$Register, ref_addr);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, false /* weak */);
-+    }
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
-+%}
-+
-+// Load Weak Pointer
-+instruct zLoadWeakP(iRegPNoSp dst, memory mem, rFlagsReg cr)
-+%{
-+  match(Set dst (LoadP mem));
-+  predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierWeak));
-+  effect(TEMP dst, KILL cr);
-+
-+  ins_cost(4 * INSN_COST);
-+
-+  format %{ "ldr  $dst, $mem" %}
-+
-+  ins_encode %{
-+    const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
-+    __ ldr($dst$$Register, ref_addr);
-+    z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, true /* weak */);
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
-+%}
-+
-+// Load Pointer Volatile
-+instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg cr)
-+%{
-+  match(Set dst (LoadP mem));
-+  predicate(UseZGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
-+  effect(TEMP dst, KILL cr);
-+
-+  ins_cost(VOLATILE_REF_COST);
-+
-+  format %{ "ldar  $dst, $mem\t" %}
-+
-+  ins_encode %{
-+    __ ldar($dst$$Register, $mem$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      z_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, false /* weak */);
-+    }
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(KILL cr, TEMP_DEF res);
-+
-+  ins_cost(2 * VOLATILE_REF_COST);
-+
-+  format %{ "cmpxchg $mem, $oldval, $newval\n\t"
-+            "cset    $res, EQ" %}
-+
-+  ins_encode %{
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+               false /* acquire */, true /* release */, false /* weak */, rscratch2);
-+    __ cset($res$$Register, Assembler::EQ);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(rscratch1, rscratch1, rscratch2);
-+      __ cbz(rscratch1, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+                 false /* acquire */, true /* release */, false /* weak */, rscratch2);
-+      __ cset($res$$Register, Assembler::EQ);
-+      __ bind(good);
-+    }
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+  predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
-+  effect(KILL cr, TEMP_DEF res);
-+
-+  ins_cost(2 * VOLATILE_REF_COST);
-+
-+ format %{ "cmpxchg $mem, $oldval, $newval\n\t"
-+           "cset    $res, EQ" %}
-+
-+  ins_encode %{
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+               true /* acquire */, true /* release */, false /* weak */, rscratch2);
-+    __ cset($res$$Register, Assembler::EQ);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(rscratch1, rscratch1, rscratch2);
-+      __ cbz(rscratch1, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ );
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+                 true /* acquire */, true /* release */, false /* weak */, rscratch2);
-+      __ cset($res$$Register, Assembler::EQ);
-+      __ bind(good);
-+    }
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
-+  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(TEMP_DEF res, KILL cr);
-+
-+  ins_cost(2 * VOLATILE_REF_COST);
-+
-+  format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
-+
-+  ins_encode %{
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+               false /* acquire */, true /* release */, false /* weak */, $res$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(rscratch1, rscratch1, $res$$Register);
-+      __ cbz(rscratch1, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+                 false /* acquire */, true /* release */, false /* weak */, $res$$Register);
-+      __ bind(good);
-+    }
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
-+  predicate(UseZGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(TEMP_DEF res, KILL cr);
-+
-+  ins_cost(2 * VOLATILE_REF_COST);
-+
-+  format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
-+
-+  ins_encode %{
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+               true /* acquire */, true /* release */, false /* weak */, $res$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(rscratch1, rscratch1, $res$$Register);
-+      __ cbz(rscratch1, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+                 true /* acquire */, true /* release */, false /* weak */, $res$$Register);
-+      __ bind(good);
-+    }
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
-+  match(Set prev (GetAndSetP mem newv));
-+  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(TEMP_DEF prev, KILL cr);
-+
-+  ins_cost(2 * VOLATILE_REF_COST);
-+
-+  format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
-+
-+  ins_encode %{
-+    __ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
-+    }
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
-+  match(Set prev (GetAndSetP mem newv));
-+  predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
-+  effect(TEMP_DEF prev, KILL cr);
-+
-+  ins_cost(VOLATILE_REF_COST);
-+
-+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
-+    }
-+  %}
-+  ins_pipe(pipe_serial);
-+%}
-+
-diff --git a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
-new file mode 100644
-index 000000000..38c2e926b
---- /dev/null
-+++ b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
-@@ -0,0 +1,168 @@
-+//
-+// Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+
-+source_hpp %{
-+
-+#include "gc/z/c2/zBarrierSetC2.hpp"
-+#include "gc/z/zThreadLocalData.hpp"
-+
-+%}
-+
-+source %{
-+
-+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
-+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
-+  __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
-+  __ jcc(Assembler::notZero, *stub->entry());
-+  __ bind(*stub->continuation());
-+}
-+
-+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
-+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
-+  __ jmp(*stub->entry());
-+  __ bind(*stub->continuation());
-+}
-+
-+%}
-+
-+// Load Pointer
-+instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr)
-+%{
-+  predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
-+  match(Set dst (LoadP mem));
-+  effect(KILL cr, TEMP dst);
-+
-+  ins_cost(125);
-+
-+  format %{ "movq     $dst, $mem" %}
-+
-+  ins_encode %{
-+    __ movptr($dst$$Register, $mem$$Address);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, false /* weak */);
-+    }
-+  %}
-+
-+  ins_pipe(ialu_reg_mem);
-+%}
-+
-+// Load Weak Pointer
-+instruct zLoadWeakP(rRegP dst, memory mem, rFlagsReg cr)
-+%{
-+  predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierWeak);
-+  match(Set dst (LoadP mem));
-+  effect(KILL cr, TEMP dst);
-+
-+  ins_cost(125);
-+
-+  format %{ "movq     $dst, $mem" %}
-+
-+  ins_encode %{
-+    __ movptr($dst$$Register, $mem$$Address);
-+    z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, true /* weak */);
-+  %}
-+
-+  ins_pipe(ialu_reg_mem);
-+%}
-+
-+instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr) %{
-+  match(Set oldval (CompareAndExchangeP mem (Binary oldval newval)));
-+  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(KILL cr, TEMP tmp);
-+
-+  format %{ "lock\n\t"
-+            "cmpxchgq $newval, $mem" %}
-+
-+  ins_encode %{
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      __ movptr($tmp$$Register, $oldval$$Register);
-+    }
-+    __ lock();
-+    __ cmpxchgptr($newval$$Register, $mem$$Address);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ jcc(Assembler::zero, good);
-+      z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
-+      __ movptr($oldval$$Register, $tmp$$Register);
-+      __ lock();
-+      __ cmpxchgptr($newval$$Register, $mem$$Address);
-+      __ bind(good);
-+    }
-+  %}
-+
-+  ins_pipe(pipe_cmpxchg);
-+%}
-+
-+instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlagsReg cr, rax_RegP oldval) %{
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(KILL cr, KILL oldval, TEMP tmp);
-+
-+  format %{ "lock\n\t"
-+            "cmpxchgq $newval, $mem\n\t"
-+            "sete     $res\n\t"
-+            "movzbl   $res, $res" %}
-+
-+  ins_encode %{
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      __ movptr($tmp$$Register, $oldval$$Register);
-+    }
-+    __ lock();
-+    __ cmpxchgptr($newval$$Register, $mem$$Address);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ jcc(Assembler::zero, good);
-+      z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
-+      __ movptr($oldval$$Register, $tmp$$Register);
-+      __ lock();
-+      __ cmpxchgptr($newval$$Register, $mem$$Address);
-+      __ bind(good);
-+      __ cmpptr($tmp$$Register, $oldval$$Register);
-+    }
-+    __ setb(Assembler::equal, $res$$Register);
-+    __ movzbl($res$$Register, $res$$Register);
-+  %}
-+
-+  ins_pipe(pipe_cmpxchg);
-+%}
-+
-+instruct zXChgP(memory mem, rRegP newval, rFlagsReg cr) %{
-+  match(Set newval (GetAndSetP mem newval));
-+  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(KILL cr);
-+
-+  format %{ "xchgq    $newval, $mem" %}
-+
-+  ins_encode %{
-+    __ xchgptr($newval$$Register, $mem$$Address);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      z_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, false /* weak */);
-+    }
-+  %}
-+
-+  ins_pipe(pipe_cmpxchg);
-+%}
-+
-diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
-index 95a8538f3..ede4d8864 100644
---- a/src/hotspot/cpu/x86/x86_64.ad
-+++ b/src/hotspot/cpu/x86/x86_64.ad
-@@ -538,19 +538,6 @@ reg_class int_rdi_reg(RDI);
- 
- %}
- 
--source_hpp %{
--
--#include "gc/z/c2/zBarrierSetC2.hpp"
--#include "gc/z/zThreadLocalData.hpp"
--
--%}
--
--source_hpp %{
--#if INCLUDE_ZGC
--#include "gc/z/zBarrierSetAssembler.hpp"
--#endif
--%}
--
- //----------SOURCE BLOCK-------------------------------------------------------
- // This is a block of C++ code which provides values, functions, and
- // definitions necessary in the rest of the architecture description
-@@ -1882,19 +1869,6 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-     return NO_REG_mask();
- }
- 
--static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
--  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
--  __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
--  __ jcc(Assembler::notZero, *stub->entry());
--  __ bind(*stub->continuation());
--}
--
--static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
--  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
--  __ jmp(*stub->entry());
--  __ bind(*stub->continuation());
--}
--
- %}
- 
- //----------ENCODING BLOCK-----------------------------------------------------
-@@ -12845,131 +12819,6 @@ instruct RethrowException()
-   ins_pipe(pipe_jmp);
- %}
- 
--//
--// Execute ZGC load barrier (strong) slow path
--//
--
--// Load Pointer
--instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr)
--%{
--  predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
--  match(Set dst (LoadP mem));
--  effect(KILL cr, TEMP dst);
--
--  ins_cost(125);
--
--  format %{ "movq     $dst, $mem" %}
--
--  ins_encode %{
--    __ movptr($dst$$Register, $mem$$Address);
--    if (barrier_data() != ZLoadBarrierElided) {
--      z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, false /* weak */);
--    }
--  %}
--
--  ins_pipe(ialu_reg_mem);
--%}
--
--// Load Weak Pointer
--instruct zLoadWeakP(rRegP dst, memory mem, rFlagsReg cr)
--%{
--  predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierWeak);
--  match(Set dst (LoadP mem));
--  effect(KILL cr, TEMP dst);
--
--  ins_cost(125);
--
--  format %{ "movq     $dst, $mem" %}
--  ins_encode %{
--    __ movptr($dst$$Register, $mem$$Address);
--    z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, true /* weak */);
--  %}
--
--  ins_pipe(ialu_reg_mem);
--%}
--
--instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr) %{
--  match(Set oldval (CompareAndExchangeP mem (Binary oldval newval)));
--  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(KILL cr, TEMP tmp);
--
--  format %{ "lock\n\t"
--            "cmpxchgq $newval, $mem" %}
--
--  ins_encode %{
--    if (barrier_data() != ZLoadBarrierElided) {
--      __ movptr($tmp$$Register, $oldval$$Register);
--    }
--    __ lock();
--    __ cmpxchgptr($newval$$Register, $mem$$Address);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
--      __ jcc(Assembler::zero, good);
--      z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
--      __ movptr($oldval$$Register, $tmp$$Register);
--      __ lock();
--      __ cmpxchgptr($newval$$Register, $mem$$Address);
--      __ bind(good);
--    }
--  %}
--
--  ins_pipe(pipe_cmpxchg);
--%}
--
--
--instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlagsReg cr, rax_RegP oldval) %{
--  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
--  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
--  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(KILL cr, KILL oldval, TEMP tmp);
--  
--  format %{ "lock\n\t"
--            "cmpxchgq $newval, $mem\n\t"
--            "sete     $res\n\t"
--           "movzbl   $res, $res" %}
--
--  ins_encode %{
--    if (barrier_data() != ZLoadBarrierElided) {
--      __ movptr($tmp$$Register, $oldval$$Register);
--    }
--    __ lock();
--    __ cmpxchgptr($newval$$Register, $mem$$Address);
--    if (barrier_data() != ZLoadBarrierElided) {
--      Label good;
--      __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
--      __ jcc(Assembler::zero, good);
--      z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
--      __ movptr($oldval$$Register, $tmp$$Register);
--      __ lock();
--      __ cmpxchgptr($newval$$Register, $mem$$Address);
--      __ bind(good);
--      __ cmpptr($tmp$$Register, $oldval$$Register);
--    }
--    __ setb(Assembler::equal, $res$$Register);
--    __ movzbl($res$$Register, $res$$Register);
--  %}
--
--  ins_pipe(pipe_cmpxchg);
--%}
--
--instruct zXChgP(memory mem, rRegP newval, rFlagsReg cr) %{
--  match(Set newval (GetAndSetP mem newval));
--  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
--  effect(KILL cr);
--
--  format %{ "xchgq    $newval, $mem" %}
--
--  ins_encode %{
--    __ xchgptr($newval$$Register, $mem$$Address);
--    if (barrier_data() != ZLoadBarrierElided) {
--      z_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, false /* weak */);
--    }
--  %}
--
--  ins_pipe(pipe_cmpxchg);
--%}  
--
- // ============================================================================
- // This name is KNOWN by the ADLC and cannot be changed.
- // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
--- 
-2.19.0
-
diff --git a/8231441-1-AArch64-Initial-SVE-backend-support.patch b/8231441-1-AArch64-Initial-SVE-backend-support.patch
index 183a8c91e2659a6adef92648234ba4b1a9402dfd..cb002807c51cc28000a84c3fcf5f022c1ec9f282 100755
--- a/8231441-1-AArch64-Initial-SVE-backend-support.patch
+++ b/8231441-1-AArch64-Initial-SVE-backend-support.patch
@@ -308,8 +308,8 @@ index 643e3d564..82e615241 100644
    static int cpu_revision()                   { return _revision; }
 +  static int get_initial_sve_vector_length()  { return _initial_sve_vector_length; };
  
-   static bool is_zva_enabled() { return 0 <= _zva_length; }
-   static int zva_length() {
+   static bool is_hisi_enabled() {
+     if (_cpu == CPU_HISILICON && (_model == 0xd01 || _model == 0xd02 || _model == 0xd03)) {
 diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java
 new file mode 100644
 index 000000000..dc15ca800
diff --git a/8233061-ZGC-Enforce-memory-ordering-in-segmented-bit.patch b/8233061-ZGC-Enforce-memory-ordering-in-segmented-bit.patch
deleted file mode 100644
index a851739d97fdc092a60bde940517b6971e27ed4d..0000000000000000000000000000000000000000
--- a/8233061-ZGC-Enforce-memory-ordering-in-segmented-bit.patch
+++ /dev/null
@@ -1,101 +0,0 @@
-From d2137837d518a8bdb8e075109e502e78bd2f9fa9 Mon Sep 17 00:00:00 2001
-Date: Wed, 19 Feb 2020 17:36:32 +0800
-Subject: [PATCH] 8233061: ZGC: Enforce memory ordering in segmented bit maps
-
-Summary: <gc>: <resolves try_mark and relocate crash>
-LLT: renaissance
-Bug url: https://bugs.openjdk.java.net/browse/JDK-8233061
----
- src/hotspot/share/gc/z/zLiveMap.cpp        | 20 +++++++++-----------
- src/hotspot/share/gc/z/zLiveMap.inline.hpp |  9 +++++----
- 2 files changed, 14 insertions(+), 15 deletions(-)
-
-diff --git a/src/hotspot/share/gc/z/zLiveMap.cpp b/src/hotspot/share/gc/z/zLiveMap.cpp
-index 7187b6166..c1d79b794 100644
---- a/src/hotspot/share/gc/z/zLiveMap.cpp
-+++ b/src/hotspot/share/gc/z/zLiveMap.cpp
-@@ -50,7 +50,9 @@ void ZLiveMap::reset(size_t index) {
- 
-   // Multiple threads can enter here, make sure only one of them
-   // resets the marking information while the others busy wait.
--  for (uint32_t seqnum = _seqnum; seqnum != ZGlobalSeqNum; seqnum = _seqnum) {
-+  for (uint32_t seqnum = OrderAccess::load_acquire(&_seqnum);
-+       seqnum != ZGlobalSeqNum;
-+       seqnum = OrderAccess::load_acquire(&_seqnum)) {
-     if ((seqnum != seqnum_initializing) &&
-         (Atomic::cmpxchg(seqnum_initializing, &_seqnum, seqnum) == seqnum)) {
-       // Reset marking information
-@@ -61,13 +63,13 @@ void ZLiveMap::reset(size_t index) {
-       segment_live_bits().clear();
-       segment_claim_bits().clear();
- 
--      // Make sure the newly reset marking information is
--      // globally visible before updating the page seqnum.
--      OrderAccess::storestore();
--
--      // Update seqnum
-       assert(_seqnum == seqnum_initializing, "Invalid");
--      _seqnum = ZGlobalSeqNum;
-+
-+      // Make sure the newly reset marking information is ordered
-+      // before the update of the page seqnum, such that when the
-+      // up-to-date seqnum is load acquired, the bit maps will not
-+      // contain stale information.
-+      OrderAccess::release_store(&_seqnum, ZGlobalSeqNum);
-       break;
-     }
- 
-@@ -89,10 +91,6 @@ void ZLiveMap::reset_segment(BitMap::idx_t segment) {
-   if (!claim_segment(segment)) {
-     // Already claimed, wait for live bit to be set
-     while (!is_segment_live(segment)) {
--      // Busy wait. The loadload barrier is needed to make
--      // sure we re-read the live bit every time we loop.
--      OrderAccess::loadload();
--
-       // Mark reset contention
-       if (!contention) {
-         // Count contention once
-diff --git a/src/hotspot/share/gc/z/zLiveMap.inline.hpp b/src/hotspot/share/gc/z/zLiveMap.inline.hpp
-index 1e4d56f41..fb45a892c 100644
---- a/src/hotspot/share/gc/z/zLiveMap.inline.hpp
-+++ b/src/hotspot/share/gc/z/zLiveMap.inline.hpp
-@@ -30,6 +30,7 @@
- #include "gc/z/zOop.inline.hpp"
- #include "gc/z/zUtils.inline.hpp"
- #include "runtime/atomic.hpp"
-+#include "runtime/orderAccess.hpp"
- #include "utilities/bitMap.inline.hpp"
- #include "utilities/debug.hpp"
- 
-@@ -38,7 +39,7 @@ inline void ZLiveMap::reset() {
- }
- 
- inline bool ZLiveMap::is_marked() const {
--  return _seqnum == ZGlobalSeqNum;
-+  return OrderAccess::load_acquire(&_seqnum) == ZGlobalSeqNum;
- }
- 
- inline uint32_t ZLiveMap::live_objects() const {
-@@ -68,15 +69,15 @@ inline BitMapView ZLiveMap::segment_claim_bits() {
- }
- 
- inline bool ZLiveMap::is_segment_live(BitMap::idx_t segment) const {
--  return segment_live_bits().at(segment);
-+  return segment_live_bits().par_at(segment);
- }
- 
- inline bool ZLiveMap::set_segment_live_atomic(BitMap::idx_t segment) {
--  return segment_live_bits().par_set_bit(segment);
-+  return segment_live_bits().par_set_bit(segment, memory_order_release);
- }
- 
- inline bool ZLiveMap::claim_segment(BitMap::idx_t segment) {
--  return segment_claim_bits().par_set_bit(segment);
-+  return segment_claim_bits().par_set_bit(segment, memory_order_acq_rel);
- }
- 
- inline BitMap::idx_t ZLiveMap::first_live_segment() const {
--- 
-2.12.3
-
diff --git a/8233073-Make-BitMap-accessors-more-memory-ordering-f.patch b/8233073-Make-BitMap-accessors-more-memory-ordering-f.patch
deleted file mode 100644
index 5e1f19e24c3be12d23568387071d29b8bd473c03..0000000000000000000000000000000000000000
--- a/8233073-Make-BitMap-accessors-more-memory-ordering-f.patch
+++ /dev/null
@@ -1,162 +0,0 @@
-diff --git a/src/hotspot/share/c1/c1_Instruction.cpp b/src/hotspot/share/c1/c1_Instruction.cpp
-index ee3be89..62d8b48 100644
---- a/src/hotspot/share/c1/c1_Instruction.cpp
-+++ b/src/hotspot/share/c1/c1_Instruction.cpp
-@@ -29,6 +29,7 @@
- #include "c1/c1_ValueStack.hpp"
- #include "ci/ciObjArrayKlass.hpp"
- #include "ci/ciTypeArrayKlass.hpp"
-+#include "utilities/bitMap.inline.hpp"
- 
- 
- // Implementation of Instruction
-diff --git a/src/hotspot/share/opto/graphKit.cpp b/src/hotspot/share/opto/graphKit.cpp
-index bf9179f..e0696de 100644
---- a/src/hotspot/share/opto/graphKit.cpp
-+++ b/src/hotspot/share/opto/graphKit.cpp
-@@ -43,6 +43,7 @@
- #include "opto/runtime.hpp"
- #include "runtime/deoptimization.hpp"
- #include "runtime/sharedRuntime.hpp"
-+#include "utilities/bitMap.inline.hpp"
- #include "utilities/macros.hpp"
- #if INCLUDE_SHENANDOAHGC
- #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
-diff --git a/src/hotspot/share/opto/parse1.cpp b/src/hotspot/share/opto/parse1.cpp
-index 99b1a67..f94f028 100644
---- a/src/hotspot/share/opto/parse1.cpp
-+++ b/src/hotspot/share/opto/parse1.cpp
-@@ -41,6 +41,7 @@
- #include "runtime/handles.inline.hpp"
- #include "runtime/safepointMechanism.hpp"
- #include "runtime/sharedRuntime.hpp"
-+#include "utilities/bitMap.inline.hpp"
- #include "utilities/copy.hpp"
- 
- // Static array so we can figure out which bytecodes stop us from compiling
-diff --git a/src/hotspot/share/utilities/bitMap.hpp b/src/hotspot/share/utilities/bitMap.hpp
-index c671535..e26f346 100644
---- a/src/hotspot/share/utilities/bitMap.hpp
-+++ b/src/hotspot/share/utilities/bitMap.hpp
-@@ -26,6 +26,7 @@
- #define SHARE_VM_UTILITIES_BITMAP_HPP
- 
- #include "memory/allocation.hpp"
-+#include "runtime/atomic.hpp"
- #include "utilities/align.hpp"
- #include "utilities/globalDefinitions.hpp"
- 
-@@ -95,6 +96,8 @@ class BitMap {
-   void set_word  (idx_t word)            { set_word(word, ~(bm_word_t)0); }
-   void clear_word(idx_t word)            { _map[word] = 0; }
- 
-+  static inline const bm_word_t load_word_ordered(const volatile bm_word_t* const addr, atomic_memory_order memory_order);
-+
-   // Utilities for ranges of bits.  Ranges are half-open [beg, end).
- 
-   // Ranges within a single word.
-@@ -194,6 +197,9 @@ class BitMap {
-     return (*word_addr(index) & bit_mask(index)) != 0;
-   }
- 
-+  // memory_order must be memory_order_relaxed or memory_order_acquire.
-+  bool par_at(idx_t index, atomic_memory_order memory_order = memory_order_acquire) const;
-+
-   // Align bit index up or down to the next bitmap word boundary, or check
-   // alignment.
-   static idx_t word_align_up(idx_t bit) {
-@@ -210,9 +216,14 @@ class BitMap {
-   inline void set_bit(idx_t bit);
-   inline void clear_bit(idx_t bit);
- 
--  // Atomically set or clear the specified bit.
--  inline bool par_set_bit(idx_t bit);
--  inline bool par_clear_bit(idx_t bit);
-+  // Attempts to change a bit to a desired value. The operation returns true if
-+  // this thread changed the value of the bit. It was changed with a RMW operation
-+  // using the specified memory_order. The operation returns false if the change
-+  // could not be set due to the bit already being observed in the desired state.
-+  // The atomic access that observed the bit in the desired state has acquire
-+  // semantics, unless memory_order is memory_order_relaxed or memory_order_release.
-+  inline bool par_set_bit(idx_t bit, atomic_memory_order memory_order = memory_order_conservative);
-+  inline bool par_clear_bit(idx_t bit, atomic_memory_order memory_order = memory_order_conservative);
- 
-   // Put the given value at the given offset. The parallel version
-   // will CAS the value into the bitmap and is quite a bit slower.
-diff --git a/src/hotspot/share/utilities/bitMap.inline.hpp b/src/hotspot/share/utilities/bitMap.inline.hpp
-index b10726d..7a7e2ad 100644
---- a/src/hotspot/share/utilities/bitMap.inline.hpp
-+++ b/src/hotspot/share/utilities/bitMap.inline.hpp
-@@ -26,6 +26,7 @@
- #define SHARE_VM_UTILITIES_BITMAP_INLINE_HPP
- 
- #include "runtime/atomic.hpp"
-+#include "runtime/orderAccess.hpp"
- #include "utilities/bitMap.hpp"
- 
- inline void BitMap::set_bit(idx_t bit) {
-@@ -38,18 +39,39 @@ inline void BitMap::clear_bit(idx_t bit) {
-   *word_addr(bit) &= ~bit_mask(bit);
- }
- 
--inline bool BitMap::par_set_bit(idx_t bit) {
-+inline const BitMap::bm_word_t BitMap::load_word_ordered(const volatile bm_word_t* const addr, atomic_memory_order memory_order) {
-+  if (memory_order == memory_order_relaxed || memory_order == memory_order_release) {
-+    return Atomic::load(addr);
-+  } else {
-+    assert(memory_order == memory_order_acq_rel ||
-+           memory_order == memory_order_acquire ||
-+           memory_order == memory_order_conservative,
-+           "unexpected memory ordering");
-+    return OrderAccess::load_acquire(addr);
-+  }
-+}
-+
-+inline bool BitMap::par_at(idx_t index, atomic_memory_order memory_order) const {
-+  verify_index(index);
-+  assert(memory_order == memory_order_acquire ||
-+         memory_order == memory_order_relaxed,
-+         "unexpected memory ordering");
-+  const volatile bm_word_t* const addr = word_addr(index);
-+  return (load_word_ordered(addr, memory_order) & bit_mask(index)) != 0;
-+}
-+
-+inline bool BitMap::par_set_bit(idx_t bit, atomic_memory_order memory_order) {
-   verify_index(bit);
-   volatile bm_word_t* const addr = word_addr(bit);
-   const bm_word_t mask = bit_mask(bit);
--  bm_word_t old_val = *addr;
-+  bm_word_t old_val = load_word_ordered(addr, memory_order);
- 
-   do {
-     const bm_word_t new_val = old_val | mask;
-     if (new_val == old_val) {
-       return false;     // Someone else beat us to it.
-     }
--    const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val);
-+    const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val, memory_order);
-     if (cur_val == old_val) {
-       return true;      // Success.
-     }
-@@ -57,18 +79,18 @@ inline bool BitMap::par_set_bit(idx_t bit) {
-   } while (true);
- }
- 
--inline bool BitMap::par_clear_bit(idx_t bit) {
-+inline bool BitMap::par_clear_bit(idx_t bit, atomic_memory_order memory_order) {
-   verify_index(bit);
-   volatile bm_word_t* const addr = word_addr(bit);
-   const bm_word_t mask = ~bit_mask(bit);
--  bm_word_t old_val = *addr;
-+  bm_word_t old_val = load_word_ordered(addr, memory_order);
- 
-   do {
-     const bm_word_t new_val = old_val & mask;
-     if (new_val == old_val) {
-       return false;     // Someone else beat us to it.
-     }
--    const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val);
-+    const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val, memory_order);
-     if (cur_val == old_val) {
-       return true;      // Success.
-     }
diff --git a/8233506-ZGC-the-load-for-Reference.get-can-be-conver.patch b/8233506-ZGC-the-load-for-Reference.get-can-be-conver.patch
deleted file mode 100644
index 9c07ceaf73973bbf9d50ca88435f9de01d43c2bf..0000000000000000000000000000000000000000
--- a/8233506-ZGC-the-load-for-Reference.get-can-be-conver.patch
+++ /dev/null
@@ -1,472 +0,0 @@
-From aa824cddc917b1fcac41a0efe5e8c794f2d5cff9 Mon Sep 17 00:00:00 2001
-Date: Thu, 26 Mar 2020 16:17:45 +0000
-Subject: [PATCH] 8233506:ZGC: the load for Reference.get() can be converted to
- a load for strong refs Summary: <gc>: <ZGC: the load for Reference.get() can
- be converted to a load for strong refs> LLT: JDK8233506 
- Bug url: https://bugs.openjdk.java.net/browse/JDK-8233506
-
----
- src/hotspot/share/gc/shared/c2/barrierSetC2.cpp | 73 +++++++++++++++----------
- src/hotspot/share/gc/shared/c2/barrierSetC2.hpp |  7 ++-
- src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp     | 42 +++++---------
- src/hotspot/share/opto/graphKit.cpp             |  9 +--
- src/hotspot/share/opto/graphKit.hpp             | 10 ++--
- src/hotspot/share/opto/memnode.cpp              |  9 ++-
- src/hotspot/share/opto/memnode.hpp              |  7 ++-
- 7 files changed, 85 insertions(+), 72 deletions(-)
-
-diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
-index 545275644..48fe04b08 100644
---- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
-+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
-@@ -115,10 +115,13 @@ Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) con
- 
-   Node* load;
-   if (in_native) {
--    load = kit->make_load(control, adr, val_type, access.type(), mo);
-+    load = kit->make_load(control, adr, val_type, access.type(), mo, dep,
-+                          requires_atomic_access, unaligned,
-+                          mismatched, unsafe, access.barrier_data());
-   } else {
-     load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo,
--                          dep, requires_atomic_access, unaligned, mismatched, unsafe);
-+                          dep, requires_atomic_access, unaligned, mismatched, unsafe,
-+                          access.barrier_data());
-   }
- 
-   access.set_raw_access(load);
-@@ -348,28 +351,28 @@ Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node*
-     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
-       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
-       Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
--      load_store = kit->gvn().transform(new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo));
-+      load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo);
-     } else
- #endif
-     {
--      load_store = kit->gvn().transform(new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo));
-+      load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo);
-     }
-   } else {
-     switch (access.type()) {
-       case T_BYTE: {
--        load_store = kit->gvn().transform(new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
-+        load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
-         break;
-       }
-       case T_SHORT: {
--        load_store = kit->gvn().transform(new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
-+        load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
-         break;
-       }
-       case T_INT: {
--        load_store = kit->gvn().transform(new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
-+        load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
-         break;
-       }
-       case T_LONG: {
--        load_store = kit->gvn().transform(new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
-+        load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
-         break;
-       }
-       default:
-@@ -377,6 +380,9 @@ Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node*
-     }
-   }
- 
-+  load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
-+  load_store = kit->gvn().transform(load_store);
-+
-   access.set_raw_access(load_store);
-   pin_atomic_op(access);
- 
-@@ -405,50 +411,50 @@ Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node
-       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
-       Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
-       if (is_weak_cas) {
--        load_store = kit->gvn().transform(new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo));
-+        load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
-       } else {
--        load_store = kit->gvn().transform(new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo));
-+        load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
-       }
-     } else
- #endif
-     {
-       if (is_weak_cas) {
--        load_store = kit->gvn().transform(new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo));
-+        load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
-       } else {
--        load_store = kit->gvn().transform(new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo));
-+        load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
-       }
-     }
-   } else {
-     switch(access.type()) {
-       case T_BYTE: {
-         if (is_weak_cas) {
--          load_store = kit->gvn().transform(new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo));
-+          load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
-         } else {
--          load_store = kit->gvn().transform(new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo));
-+          load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
-         }
-         break;
-       }
-       case T_SHORT: {
-         if (is_weak_cas) {
--          load_store = kit->gvn().transform(new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo));
-+          load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
-         } else {
--          load_store = kit->gvn().transform(new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo));
-+          load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
-         }
-         break;
-       }
-       case T_INT: {
-         if (is_weak_cas) {
--          load_store = kit->gvn().transform(new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo));
-+          load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
-         } else {
--          load_store = kit->gvn().transform(new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo));
-+          load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
-         }
-         break;
-       }
-       case T_LONG: {
-         if (is_weak_cas) {
--          load_store = kit->gvn().transform(new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo));
-+          load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
-         } else {
--          load_store = kit->gvn().transform(new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo));
-+          load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
-         }
-         break;
-       }
-@@ -457,6 +463,9 @@ Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node
-     }
-   }
- 
-+  load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
-+  load_store = kit->gvn().transform(load_store);
-+
-   access.set_raw_access(load_store);
-   pin_atomic_op(access);
- 
-@@ -478,27 +487,30 @@ Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_va
-     } else
- #endif
-     {
--      load_store = kit->gvn().transform(new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr()));
-+      load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr());
-     }
-   } else  {
-     switch (access.type()) {
-       case T_BYTE:
--        load_store = kit->gvn().transform(new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type));
-+        load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type);
-         break;
-       case T_SHORT:
--        load_store = kit->gvn().transform(new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type));
-+        load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type);
-         break;
-       case T_INT:
--        load_store = kit->gvn().transform(new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type));
-+        load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type);
-         break;
-       case T_LONG:
--        load_store = kit->gvn().transform(new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type));
-+        load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type);
-         break;
-       default:
-         ShouldNotReachHere();
-     }
-   }
- 
-+  load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
-+  load_store = kit->gvn().transform(load_store);
-+
-   access.set_raw_access(load_store);
-   pin_atomic_op(access);
- 
-@@ -520,21 +532,24 @@ Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicAccess& access, Node* new_val
- 
-   switch(access.type()) {
-     case T_BYTE:
--      load_store = kit->gvn().transform(new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type));
-+      load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type);
-       break;
-     case T_SHORT:
--      load_store = kit->gvn().transform(new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type));
-+      load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type);
-       break;
-     case T_INT:
--      load_store = kit->gvn().transform(new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type));
-+      load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type);
-       break;
-     case T_LONG:
--      load_store = kit->gvn().transform(new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type));
-+      load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type);
-       break;
-     default:
-       ShouldNotReachHere();
-   }
- 
-+  load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
-+  load_store = kit->gvn().transform(load_store);
-+
-   access.set_raw_access(load_store);
-   pin_atomic_op(access);
- 
-diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
-index 487988bd8..8b4be7d11 100644
---- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
-+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
-@@ -96,6 +96,7 @@ protected:
-   Node*             _base;
-   C2AccessValuePtr& _addr;
-   Node*             _raw_access;
-+  uint8_t           _barrier_data;
- 
-   void fixup_decorators();
-   void* barrier_set_state() const;
-@@ -108,7 +109,8 @@ public:
-     _type(type),
-     _base(base),
-     _addr(addr),
--    _raw_access(NULL)
-+    _raw_access(NULL),
-+    _barrier_data(0)
-   {
-     fixup_decorators();
-   }
-@@ -122,6 +124,9 @@ public:
-   bool is_raw() const             { return (_decorators & AS_RAW) != 0; }
-   Node* raw_access() const        { return _raw_access; }
- 
-+  uint8_t barrier_data() const        { return _barrier_data; }
-+  void set_barrier_data(uint8_t data) { _barrier_data = data; }
-+
-   void set_raw_access(Node* raw_access) { _raw_access = raw_access; }
-   virtual void set_memory() {} // no-op for normal accesses, but not for atomic accesses.
- 
-diff --git a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp
-index a12973464..e178761a0 100644
---- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp
-+++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp
-@@ -174,48 +174,36 @@ int ZBarrierSetC2::estimate_stub_size() const {
-   return size;
- }
- 
--static bool barrier_needed(C2Access access) {
--  return ZBarrierSet::barrier_needed(access.decorators(), access.type());
--}
--
--Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
--  Node* result = BarrierSetC2::load_at_resolved(access, val_type);
--  if (barrier_needed(access) && access.raw_access()->is_Mem()) {
--    if ((access.decorators() & ON_WEAK_OOP_REF) != 0) {
--      access.raw_access()->as_Load()->set_barrier_data(ZLoadBarrierWeak);
-+static void set_barrier_data(C2Access& access) {
-+  if (ZBarrierSet::barrier_needed(access.decorators(), access.type())) {
-+    if (access.decorators() & ON_WEAK_OOP_REF) {
-+      access.set_barrier_data(ZLoadBarrierWeak);
-     } else {
--      access.raw_access()->as_Load()->set_barrier_data(ZLoadBarrierStrong);
-+      access.set_barrier_data(ZLoadBarrierStrong);
-     }
-   }
-+}
- 
--  return result;
-+Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
-+  set_barrier_data(access);
-+  return BarrierSetC2::load_at_resolved(access, val_type);
- }
- 
- Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val,
-                                                     Node* new_val, const Type* val_type) const {
--  Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
--  if (barrier_needed(access)) {
--    access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong);
--  }
--  return result;
-+  set_barrier_data(access);
-+  return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
- }
- 
- Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val,
-                                                      Node* new_val, const Type* value_type) const {
--  Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
--  if (barrier_needed(access)) {
--    access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong);
--  }
--  return result;
--
-+  set_barrier_data(access);
-+  return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
- }
- 
- Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* val_type) const {
--  Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
--  if (barrier_needed(access)) {
--    access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong);
--  }
--  return result;
-+  set_barrier_data(access);
-+  return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
- }
- 
- bool ZBarrierSetC2::array_copy_requires_gc_barriers(BasicType type) const {
-diff --git a/src/hotspot/share/opto/graphKit.cpp b/src/hotspot/share/opto/graphKit.cpp
-index 7bf2f6cfb..a1547b42f 100644
---- a/src/hotspot/share/opto/graphKit.cpp
-+++ b/src/hotspot/share/opto/graphKit.cpp
-@@ -1493,18 +1493,19 @@ Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
-                           bool require_atomic_access,
-                           bool unaligned,
-                           bool mismatched,
--                          bool unsafe) {
-+                          bool unsafe,
-+                          uint8_t barrier_data) {
-   assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" );
-   const TypePtr* adr_type = NULL; // debug-mode-only argument
-   debug_only(adr_type = C->get_adr_type(adr_idx));
-   Node* mem = memory(adr_idx);
-   Node* ld;
-   if (require_atomic_access && bt == T_LONG) {
--    ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe);
-+    ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe, barrier_data);
-   } else if (require_atomic_access && bt == T_DOUBLE) {
--    ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe);
-+    ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe, barrier_data);
-   } else {
--    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency, unaligned, mismatched, unsafe);
-+    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency, unaligned, mismatched, unsafe, barrier_data);
-   }
-   ld = _gvn.transform(ld);
-   if (((bt == T_OBJECT) && C->do_escape_analysis()) || C->eliminate_boxing()) {
-diff --git a/src/hotspot/share/opto/graphKit.hpp b/src/hotspot/share/opto/graphKit.hpp
-index 07c20bbd5..df5d18ccc 100644
---- a/src/hotspot/share/opto/graphKit.hpp
-+++ b/src/hotspot/share/opto/graphKit.hpp
-@@ -518,27 +518,27 @@ class GraphKit : public Phase {
-   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
-                   MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
-                   bool require_atomic_access = false, bool unaligned = false,
--                  bool mismatched = false, bool unsafe = false) {
-+                  bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0) {
-     // This version computes alias_index from bottom_type
-     return make_load(ctl, adr, t, bt, adr->bottom_type()->is_ptr(),
-                      mo, control_dependency, require_atomic_access,
--                     unaligned, mismatched, unsafe);
-+                     unaligned, mismatched, unsafe, barrier_data);
-   }
-   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type,
-                   MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
-                   bool require_atomic_access = false, bool unaligned = false,
--                  bool mismatched = false, bool unsafe = false) {
-+                  bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0) {
-     // This version computes alias_index from an address type
-     assert(adr_type != NULL, "use other make_load factory");
-     return make_load(ctl, adr, t, bt, C->get_alias_index(adr_type),
-                      mo, control_dependency, require_atomic_access,
--                     unaligned, mismatched, unsafe);
-+                     unaligned, mismatched, unsafe, barrier_data);
-   }
-   // This is the base version which is given an alias index.
-   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx,
-                   MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
-                   bool require_atomic_access = false, bool unaligned = false,
--                  bool mismatched = false, bool unsafe = false);
-+                  bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0);
- 
-   // Create & transform a StoreNode and store the effect into the
-   // parser's memory state.
-diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp
-index ee0f09e11..ff0a5726c 100644
---- a/src/hotspot/share/opto/memnode.cpp
-+++ b/src/hotspot/share/opto/memnode.cpp
-@@ -808,7 +808,7 @@ bool LoadNode::is_immutable_value(Node* adr) {
- //----------------------------LoadNode::make-----------------------------------
- // Polymorphic factory method:
- Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo,
--                     ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) {
-+                     ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) {
-   Compile* C = gvn.C;
- 
-   // sanity check the alias category against the created node type
-@@ -859,6 +859,7 @@ Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypeP
-   if (unsafe) {
-     load->set_unsafe_access();
-   }
-+  load->set_barrier_data(barrier_data);
-   if (load->Opcode() == Op_LoadN) {
-     Node* ld = gvn.transform(load);
-     return new DecodeNNode(ld, ld->bottom_type()->make_ptr());
-@@ -868,7 +869,7 @@ Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypeP
- }
- 
- LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo,
--                                  ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) {
-+                                  ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) {
-   bool require_atomic = true;
-   LoadLNode* load = new LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency, require_atomic);
-   if (unaligned) {
-@@ -880,11 +881,12 @@ LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr
-   if (unsafe) {
-     load->set_unsafe_access();
-   }
-+  load->set_barrier_data(barrier_data);
-   return load;
- }
- 
- LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo,
--                                  ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) {
-+                                  ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) {
-   bool require_atomic = true;
-   LoadDNode* load = new LoadDNode(ctl, mem, adr, adr_type, rt, mo, control_dependency, require_atomic);
-   if (unaligned) {
-@@ -896,6 +898,7 @@ LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr
-   if (unsafe) {
-     load->set_unsafe_access();
-   }
-+  load->set_barrier_data(barrier_data);
-   return load;
- }
- 
-diff --git a/src/hotspot/share/opto/memnode.hpp b/src/hotspot/share/opto/memnode.hpp
-index 7468abdbc..14a4a67c6 100644
---- a/src/hotspot/share/opto/memnode.hpp
-+++ b/src/hotspot/share/opto/memnode.hpp
-@@ -227,7 +227,8 @@ public:
-   static Node* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr,
-                     const TypePtr* at, const Type *rt, BasicType bt,
-                     MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest,
--                    bool unaligned = false, bool mismatched = false, bool unsafe = false);
-+                    bool unaligned = false, bool mismatched = false, bool unsafe = false,
-+                    uint8_t barrier_data = 0);
- 
-   virtual uint hash()   const;  // Check the type
- 
-@@ -408,7 +409,7 @@ public:
-   bool require_atomic_access() const { return _require_atomic_access; }
-   static LoadLNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
-                                 const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest,
--                                bool unaligned = false, bool mismatched = false, bool unsafe = false);
-+                                bool unaligned = false, bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0);
- #ifndef PRODUCT
-   virtual void dump_spec(outputStream *st) const {
-     LoadNode::dump_spec(st);
-@@ -460,7 +461,7 @@ public:
-   bool require_atomic_access() const { return _require_atomic_access; }
-   static LoadDNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
-                                 const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest,
--                                bool unaligned = false, bool mismatched = false, bool unsafe = false);
-+                                bool unaligned = false, bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0);
- #ifndef PRODUCT
-   virtual void dump_spec(outputStream *st) const {
-     LoadNode::dump_spec(st);
--- 
-2.12.3
-
diff --git a/Add-KAE-implementation.patch b/Add-KAE-implementation.patch
index db5193ad1e4cc9000d4ab544cacccf47b1e0404b..f91f5ba9473c880f53e1e6ccdcce569b01e723f4 100644
--- a/Add-KAE-implementation.patch
+++ b/Add-KAE-implementation.patch
@@ -158,8 +158,8 @@ index 6672d26a5..c3e8ceb35 100644
  
 +JDKOPT_DETECT_KAE
  JDKOPT_DETECT_INTREE_EC
- JDKOPT_ENABLE_DISABLE_FAILURE_HANDLER
- JDKOPT_ENABLE_DISABLE_GENERATE_CLASSLIST
+ LIB_TESTS_ENABLE_DISABLE_FAILURE_HANDLER
+
 diff --git a/make/autoconf/jdk-options.m4 b/make/autoconf/jdk-options.m4
 index 9d64b31bf..e20eafa60 100644
 --- a/make/autoconf/jdk-options.m4
diff --git a/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch b/Add-riscv64-support.patch
similarity index 83%
rename from 2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch
rename to Add-riscv64-support.patch
index 13815b71215d789e75976982277c6f8b05762627..b06b0fe866fb0ae52a579323d4ef4e80effd0c9b 100644
--- a/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch
+++ b/Add-riscv64-support.patch
@@ -1,435 +1,163 @@
-From 77eaf1804b7e56ed17a6c3a478e6ee9df89ea024 Mon Sep 17 00:00:00 2001
-From: misaka00251 <liuxin@iscas.ac.cn>
-Date: Wed, 9 Aug 2023 02:24:23 +0800
-Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch)
-
----
- make/autoconf/build-aux/config.sub            |     7 +
- make/autoconf/hotspot.m4                      |     3 +-
- make/autoconf/libraries.m4                    |     4 +-
- make/autoconf/platform.m4                     |    10 +-
- make/hotspot/gensrc/GensrcAdlc.gmk            |    16 +-
- src/hotspot/cpu/aarch64/aarch64.ad            |    40 +-
- .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp   |     4 +-
- .../cpu/aarch64/macroAssembler_aarch64.cpp    |    64 +
- .../cpu/aarch64/macroAssembler_aarch64.hpp    |     3 +
- src/hotspot/cpu/arm/arm.ad                    |    10 +-
- src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp   |     5 +-
- src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp   |     5 +-
- src/hotspot/cpu/ppc/ppc.ad                    |    16 +-
- .../cpu/riscv/abstractInterpreter_riscv.cpp   |   185 +
- src/hotspot/cpu/riscv/assembler_riscv.cpp     |   365 +
- src/hotspot/cpu/riscv/assembler_riscv.hpp     |  2004 +++
- .../cpu/riscv/assembler_riscv.inline.hpp      |    47 +
- src/hotspot/cpu/riscv/bytes_riscv.hpp         |   169 +
- src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp  |   352 +
- src/hotspot/cpu/riscv/c1_Defs_riscv.hpp       |    85 +
- .../cpu/riscv/c1_FpuStackSim_riscv.cpp        |    31 +
- .../cpu/riscv/c1_FpuStackSim_riscv.hpp        |    33 +
- src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp   |   391 +
- src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp   |   149 +
- .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp |   287 +
- .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp |    36 +
- .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp |   387 +
- .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp |    51 +
- .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |  2275 ++++
- .../cpu/riscv/c1_LIRAssembler_riscv.hpp       |   132 +
- .../cpu/riscv/c1_LIRGenerator_riscv.cpp       |  1083 ++
- src/hotspot/cpu/riscv/c1_LIR_riscv.cpp        |    55 +
- src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp |    33 +
- src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp |    85 +
- .../cpu/riscv/c1_MacroAssembler_riscv.cpp     |   441 +
- .../cpu/riscv/c1_MacroAssembler_riscv.hpp     |   121 +
- src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp   |  1206 ++
- src/hotspot/cpu/riscv/c1_globals_riscv.hpp    |    72 +
- src/hotspot/cpu/riscv/c2_globals_riscv.hpp    |    91 +
- src/hotspot/cpu/riscv/c2_init_riscv.cpp       |    38 +
- src/hotspot/cpu/riscv/codeBuffer_riscv.hpp    |    36 +
- src/hotspot/cpu/riscv/compiledIC_riscv.cpp    |   154 +
- src/hotspot/cpu/riscv/copy_riscv.hpp          |    60 +
- src/hotspot/cpu/riscv/depChecker_riscv.hpp    |    32 +
- src/hotspot/cpu/riscv/disassembler_riscv.hpp  |    37 +
- src/hotspot/cpu/riscv/frame_riscv.cpp         |   683 +
- src/hotspot/cpu/riscv/frame_riscv.hpp         |   200 +
- src/hotspot/cpu/riscv/frame_riscv.inline.hpp  |   257 +
- .../gc/g1/g1BarrierSetAssembler_riscv.cpp     |   479 +
- .../gc/g1/g1BarrierSetAssembler_riscv.hpp     |    78 +
- .../gc/shared/barrierSetAssembler_riscv.cpp   |   226 +
- .../gc/shared/barrierSetAssembler_riscv.hpp   |    75 +
- .../cardTableBarrierSetAssembler_riscv.cpp    |   120 +
- .../cardTableBarrierSetAssembler_riscv.hpp    |    43 +
- .../modRefBarrierSetAssembler_riscv.cpp       |    54 +
- .../modRefBarrierSetAssembler_riscv.hpp       |    55 +
- .../c1/shenandoahBarrierSetC1_riscv.cpp       |   124 +
- .../shenandoahBarrierSetAssembler_riscv.cpp   |   743 ++
- .../shenandoahBarrierSetAssembler_riscv.hpp   |    92 +
- .../riscv/gc/shenandoah/shenandoah_riscv64.ad |   188 +
- .../cpu/riscv/globalDefinitions_riscv.hpp     |    44 +
- src/hotspot/cpu/riscv/globals_riscv.hpp       |   120 +
- src/hotspot/cpu/riscv/icBuffer_riscv.cpp      |    79 +
- src/hotspot/cpu/riscv/icache_riscv.cpp        |    61 +
- src/hotspot/cpu/riscv/icache_riscv.hpp        |    42 +
- src/hotspot/cpu/riscv/interp_masm_riscv.cpp   |  1932 +++
- src/hotspot/cpu/riscv/interp_masm_riscv.hpp   |   283 +
- src/hotspot/cpu/riscv/interpreterRT_riscv.cpp |   296 +
- src/hotspot/cpu/riscv/interpreterRT_riscv.hpp |    68 +
- .../cpu/riscv/javaFrameAnchor_riscv.hpp       |    89 +
- .../cpu/riscv/jniFastGetField_riscv.cpp       |   193 +
- src/hotspot/cpu/riscv/jniTypes_riscv.hpp      |   108 +
- .../cpu/riscv/macroAssembler_riscv.cpp        |  5861 +++++++++
- .../cpu/riscv/macroAssembler_riscv.hpp        |   975 ++
- .../cpu/riscv/macroAssembler_riscv.inline.hpp |    30 +
- src/hotspot/cpu/riscv/methodHandles_riscv.cpp |   440 +
- src/hotspot/cpu/riscv/methodHandles_riscv.hpp |    58 +
- src/hotspot/cpu/riscv/nativeInst_riscv.cpp    |   404 +
- src/hotspot/cpu/riscv/nativeInst_riscv.hpp    |   561 +
- src/hotspot/cpu/riscv/registerMap_riscv.hpp   |    46 +
- .../cpu/riscv/register_definitions_riscv.cpp  |   193 +
- src/hotspot/cpu/riscv/register_riscv.cpp      |    69 +
- src/hotspot/cpu/riscv/register_riscv.hpp      |   337 +
- src/hotspot/cpu/riscv/relocInfo_riscv.cpp     |   113 +
- src/hotspot/cpu/riscv/relocInfo_riscv.hpp     |    45 +
- src/hotspot/cpu/riscv/riscv.ad                | 10685 ++++++++++++++++
- src/hotspot/cpu/riscv/riscv_b.ad              |   605 +
- src/hotspot/cpu/riscv/riscv_v.ad              |  1723 +++
- src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp |  2738 ++++
- src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |  3743 ++++++
- src/hotspot/cpu/riscv/stubRoutines_riscv.cpp  |    60 +
- src/hotspot/cpu/riscv/stubRoutines_riscv.hpp  |   179 +
- .../templateInterpreterGenerator_riscv.cpp    |  1841 +++
- src/hotspot/cpu/riscv/templateTable_riscv.cpp |  4028 ++++++
- src/hotspot/cpu/riscv/templateTable_riscv.hpp |    42 +
- src/hotspot/cpu/riscv/vmStructs_riscv.hpp     |    43 +
- .../cpu/riscv/vm_version_ext_riscv.cpp        |    91 +
- .../cpu/riscv/vm_version_ext_riscv.hpp        |    55 +
- src/hotspot/cpu/riscv/vm_version_riscv.cpp    |   190 +
- src/hotspot/cpu/riscv/vm_version_riscv.hpp    |    65 +
- src/hotspot/cpu/riscv/vmreg_riscv.cpp         |    60 +
- src/hotspot/cpu/riscv/vmreg_riscv.hpp         |    64 +
- src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp  |    47 +
- src/hotspot/cpu/riscv/vtableStubs_riscv.cpp   |   260 +
- src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp |     5 +-
- src/hotspot/cpu/s390/s390.ad                  |    16 +-
- src/hotspot/cpu/sparc/sparc.ad                |    10 +-
- src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp   |     5 +-
- src/hotspot/cpu/x86/macroAssembler_x86.cpp    |    93 +
- src/hotspot/cpu/x86/macroAssembler_x86.hpp    |     2 +
- src/hotspot/cpu/x86/x86.ad                    |    14 +-
- src/hotspot/cpu/x86/x86_32.ad                 |    19 +-
- src/hotspot/cpu/x86/x86_64.ad                 |    24 +-
- src/hotspot/os/linux/os_linux.cpp             |    11 +-
- .../os_cpu/linux_riscv/atomic_linux_riscv.hpp |   113 +
- .../linux_riscv/bytes_linux_riscv.inline.hpp  |    44 +
- .../linux_riscv/copy_linux_riscv.inline.hpp   |   116 +
- .../linux_riscv/globals_linux_riscv.hpp       |    43 +
- .../linux_riscv/orderAccess_linux_riscv.hpp   |    73 +
- .../os_cpu/linux_riscv/os_linux_riscv.cpp     |   628 +
- .../os_cpu/linux_riscv/os_linux_riscv.hpp     |    40 +
- .../prefetch_linux_riscv.inline.hpp           |    38 +
- .../os_cpu/linux_riscv/thread_linux_riscv.cpp |   103 +
- .../os_cpu/linux_riscv/thread_linux_riscv.hpp |    67 +
- .../linux_riscv/vmStructs_linux_riscv.hpp     |    55 +
- .../linux_riscv/vm_version_linux_riscv.cpp    |   116 +
- src/hotspot/share/adlc/archDesc.cpp           |     5 +
- src/hotspot/share/adlc/formssel.cpp           |     2 +
- src/hotspot/share/c1/c1_LIR.cpp               |   113 +-
- src/hotspot/share/c1/c1_LIR.hpp               |   208 +-
- src/hotspot/share/c1/c1_LIRAssembler.cpp      |    15 +-
- src/hotspot/share/c1/c1_LIRAssembler.hpp      |     4 +-
- src/hotspot/share/c1/c1_LinearScan.cpp        |    14 +-
- src/hotspot/share/classfile/vmSymbols.cpp     |     2 +
- src/hotspot/share/classfile/vmSymbols.hpp     |     1 +
- .../gc/shenandoah/shenandoahArguments.cpp     |     2 +-
- .../share/jfr/utilities/jfrBigEndian.hpp      |     2 +-
- src/hotspot/share/opto/c2compiler.cpp         |     1 +
- src/hotspot/share/opto/chaitin.cpp            |    90 +-
- src/hotspot/share/opto/chaitin.hpp            |    32 +-
- src/hotspot/share/opto/intrinsicnode.hpp      |     5 +-
- src/hotspot/share/opto/library_call.cpp       |    13 +-
- src/hotspot/share/opto/machnode.cpp           |     2 +-
- src/hotspot/share/opto/machnode.hpp           |     4 +
- src/hotspot/share/opto/matcher.cpp            |    41 +-
- src/hotspot/share/opto/matcher.hpp            |     6 +-
- src/hotspot/share/opto/node.cpp               |    21 +
- src/hotspot/share/opto/node.hpp               |     5 +
- src/hotspot/share/opto/opcodes.cpp            |     4 +-
- src/hotspot/share/opto/opcodes.hpp            |     2 +
- src/hotspot/share/opto/phase.cpp              |     2 +
- src/hotspot/share/opto/phase.hpp              |     1 +
- src/hotspot/share/opto/postaloc.cpp           |    53 +-
- src/hotspot/share/opto/regmask.cpp            |    46 +-
- src/hotspot/share/opto/regmask.hpp            |    10 +-
- src/hotspot/share/opto/superword.cpp          |     7 +-
- src/hotspot/share/opto/type.cpp               |    14 +-
- src/hotspot/share/opto/type.hpp               |    12 +-
- src/hotspot/share/opto/vectornode.cpp         |     4 +-
- .../share/runtime/abstract_vm_version.cpp     |    12 +-
- src/hotspot/share/runtime/thread.hpp          |     2 +-
- src/hotspot/share/runtime/thread.inline.hpp   |     2 +-
- src/hotspot/share/utilities/debug.cpp         |     1 +
- src/hotspot/share/utilities/macros.hpp        |    26 +
- .../share/classes/java/lang/StringLatin1.java |     5 +
- .../native/libsaproc/LinuxDebuggerLocal.c     |    49 +-
- .../linux/native/libsaproc/libproc.h          |     2 +
- .../linux/native/libsaproc/ps_proc.c          |     4 +
- .../classes/sun/jvm/hotspot/HotSpotAgent.java |     4 +
- .../debugger/MachineDescriptionRISCV64.java   |    40 +
- .../debugger/linux/LinuxCDebugger.java        |    11 +-
- .../linux/riscv64/LinuxRISCV64CFrame.java     |    90 +
- .../riscv64/LinuxRISCV64ThreadContext.java    |    48 +
- .../debugger/proc/ProcDebuggerLocal.java      |     6 +
- .../proc/riscv64/ProcRISCV64Thread.java       |    88 +
- .../riscv64/ProcRISCV64ThreadContext.java     |    48 +
- .../riscv64/ProcRISCV64ThreadFactory.java     |    46 +
- .../remote/riscv64/RemoteRISCV64Thread.java   |    55 +
- .../riscv64/RemoteRISCV64ThreadContext.java   |    48 +
- .../riscv64/RemoteRISCV64ThreadFactory.java   |    46 +
- .../riscv64/RISCV64ThreadContext.java         |   172 +
- .../sun/jvm/hotspot/runtime/Threads.java      |     3 +
- .../LinuxRISCV64JavaThreadPDAccess.java       |   132 +
- .../riscv64/RISCV64CurrentFrameGuess.java     |   223 +
- .../hotspot/runtime/riscv64/RISCV64Frame.java |   554 +
- .../riscv64/RISCV64JavaCallWrapper.java       |    58 +
- .../runtime/riscv64/RISCV64RegisterMap.java   |    53 +
- .../jvm/hotspot/utilities/PlatformInfo.java   |     2 +-
- src/utils/hsdis/hsdis.c                       |     6 +-
- test/hotspot/jtreg/compiler/c2/TestBit.java   |     6 +-
- ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java |     4 +
- ...HA256IntrinsicsOptionOnUnsupportedCPU.java |     4 +
- ...HA512IntrinsicsOptionOnUnsupportedCPU.java |     4 +
- .../cli/TestUseSHAOptionOnUnsupportedCPU.java |     4 +
- .../testcases/GenericTestCaseForOtherCPU.java |    10 +-
- ...nericTestCaseForUnsupportedRISCV64CPU.java |   102 +
- .../string/TestStringLatin1IndexOfChar.java   |   153 +
- .../loopopts/superword/ProdRed_Double.java    |     2 +-
- .../loopopts/superword/ProdRed_Float.java     |     2 +-
- .../loopopts/superword/ProdRed_Int.java       |     2 +-
- .../loopopts/superword/ReductionPerf.java     |     2 +-
- .../superword/SumRedAbsNeg_Double.java        |     2 +-
- .../superword/SumRedAbsNeg_Float.java         |     2 +-
- .../loopopts/superword/SumRedSqrt_Double.java |     2 +-
- .../loopopts/superword/SumRed_Double.java     |     2 +-
- .../loopopts/superword/SumRed_Float.java      |     2 +-
- .../loopopts/superword/SumRed_Int.java        |     2 +-
- .../argumentcorruption/CheckLongArgs.java     |     2 +-
- .../criticalnatives/lookup/LookUp.java        |     2 +-
- .../sha/predicate/IntrinsicPredicates.java    |     9 +-
- .../NMT/CheckForProperDetailStackTrace.java   |     3 +-
- .../ReservedStack/ReservedStackTest.java      |     3 +-
- test/hotspot/jtreg/test_env.sh                |     5 +
- ...stMutuallyExclusivePlatformPredicates.java |     3 +-
- .../nsk/jvmti/GetThreadInfo/thrinfo001.java   |     2 +-
- .../jdk/jfr/event/os/TestCPUInformation.java  |     5 +-
- test/lib/jdk/test/lib/Platform.java           |     5 +
- .../bench/java/lang/StringIndexOfChar.java    |   221 +
- 218 files changed, 57653 insertions(+), 221 deletions(-)
- create mode 100644 src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
- create mode 100644 src/hotspot/cpu/riscv/bytes_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/c1_globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c2_globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/c2_init_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/compiledIC_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/copy_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/disassembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/frame_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/frame_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/frame_riscv.inline.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
- create mode 100644 src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/globals_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/icBuffer_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/icache_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/icache_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/jniTypes_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
- create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/register_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/register_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/riscv.ad
- create mode 100644 src/hotspot/cpu/riscv/riscv_b.ad
- create mode 100644 src/hotspot/cpu/riscv/riscv_v.ad
- create mode 100644 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/vmStructs_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.cpp
- create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.hpp
- create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
- create mode 100644 src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
- create mode 100644 src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
- create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
- create mode 100644 test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
- create mode 100644 test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java
- create mode 100644 test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java
-
+diff --git a/.github/workflows/build-cross-compile.yml b/.github/workflows/build-cross-compile.yml
+index 385b097b9f..b1c333f711 100644
+--- a/.github/workflows/build-cross-compile.yml
++++ b/.github/workflows/build-cross-compile.yml
+@@ -54,28 +54,39 @@ jobs:
+           - arm
+           - s390x
+           - ppc64le
++          - riscv64
+         include:
+           - target-cpu: aarch64
+             gnu-arch: aarch64
+             debian-arch: arm64
+             debian-repository: https://httpredir.debian.org/debian/
+             debian-version: bullseye
++            tolerate-sysroot-errors: false
+           - target-cpu: arm
+             gnu-arch: arm
+             debian-arch: armhf
+             debian-repository: https://httpredir.debian.org/debian/
+             debian-version: bullseye
++            tolerate-sysroot-errors: false
+             gnu-abi: eabihf
+           - target-cpu: s390x
+             gnu-arch: s390x
+             debian-arch: s390x
+             debian-repository: https://httpredir.debian.org/debian/
+             debian-version: bullseye
++            tolerate-sysroot-errors: false
+           - target-cpu: ppc64le
+             gnu-arch: powerpc64le
+             debian-arch: ppc64el
+             debian-repository: https://httpredir.debian.org/debian/
+             debian-version: bullseye
++            tolerate-sysroot-errors: false
++          - target-cpu: riscv64
++            gnu-arch: riscv64
++            debian-arch: riscv64
++            debian-repository: https://httpredir.debian.org/debian/
++            debian-version: sid
++            tolerate-sysroot-errors: true
+ 
+     steps:
+       - name: 'Checkout the JDK source'
+@@ -113,6 +124,7 @@ jobs:
+         if: steps.get-cached-sysroot.outputs.cache-hit != 'true'
+ 
+       - name: 'Create sysroot'
++        id: create-sysroot
+         run: >
+           sudo debootstrap
+           --arch=${{ matrix.debian-arch }}
+@@ -123,6 +135,7 @@ jobs:
+           ${{ matrix.debian-version }}
+           sysroot
+           ${{ matrix.debian-repository }}
++        continue-on-error: ${{ matrix.tolerate-sysroot-errors }}
+         if: steps.get-cached-sysroot.outputs.cache-hit != 'true'
+ 
+       - name: 'Prepare sysroot'
+@@ -134,7 +147,12 @@ jobs:
+           rm -rf sysroot/usr/{sbin,bin,share}
+           rm -rf sysroot/usr/lib/{apt,gcc,udev,systemd}
+           rm -rf sysroot/usr/libexec/gcc
+-        if: steps.get-cached-sysroot.outputs.cache-hit != 'true'
++        if: steps.create-sysroot.outcome == 'success' && steps.get-cached-sysroot.outputs.cache-hit != 'true'
++
++      - name: 'Remove broken sysroot'
++        run: |
++          sudo rm -rf sysroot/
++        if: steps.create-sysroot.outcome != 'success' && steps.get-cached-sysroot.outputs.cache-hit != 'true'
+ 
+       - name: 'Configure'
+         run: >
+@@ -153,6 +171,7 @@ jobs:
+           echo "Dumping config.log:" &&
+           cat config.log &&
+           exit 1)
++        if: steps.create-sysroot.outcome == 'success' || steps.get-cached-sysroot.outputs.cache-hit == 'true'
+ 
+       - name: 'Build'
+         id: build
+@@ -160,3 +179,4 @@ jobs:
+         with:
+           make-target: 'hotspot ${{ inputs.make-arguments }}'
+           platform: linux-${{ matrix.target-cpu }}
++        if: steps.create-sysroot.outcome == 'success' || steps.get-cached-sysroot.outputs.cache-hit == 'true'
+diff --git a/.jcheck/conf b/.jcheck/conf
+index be7ad6d26f..e35eb77696 100644
+--- a/.jcheck/conf
++++ b/.jcheck/conf
+@@ -1,5 +1,5 @@
+ [general]
+-project=jdk-updates
++project=riscv-port
+ jbs=JDK
+ version=11.0.24
+ 
 diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub
-index 3c280ac7c..eda408e01 100644
+index 3c280ac7c0..6c66c221e0 100644
 --- a/make/autoconf/build-aux/config.sub
 +++ b/make/autoconf/build-aux/config.sub
-@@ -48,6 +48,13 @@ if ! echo $* | grep '^aarch64-' >/dev/null ; then
+@@ -1,6 +1,6 @@
+ #!/bin/sh
+ #
+-# Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+ # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ #
+ # This code is free software; you can redistribute it and/or modify it
+@@ -40,6 +40,13 @@ if echo $* | grep pc-msys >/dev/null ; then
      exit
  fi
  
 +# Canonicalize for riscv which autoconf-config.sub doesn't handle
-+if echo $* | grep '^riscv\(32\|64\)-linux' > /dev/null ; then
++if echo $* | grep '^riscv\(32\|64\)-linux' >/dev/null ; then
 +    result=`echo $@ | sed 's/linux/unknown-linux/'`
 +    echo $result
 +    exit
 +fi
 +
- while test $# -gt 0 ; do
-     case $1 in
-         -- )   # Stop option processing
+ # First, filter out everything that doesn't begin with "aarch64-"
+ if ! echo $* | grep '^aarch64-' >/dev/null ; then
+     . $DIR/autoconf-config.sub "$@"
 diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
-index a3e1e00b2..01ef26c10 100644
+index 9bb34363e5..f84e8f84c6 100644
 --- a/make/autoconf/hotspot.m4
 +++ b/make/autoconf/hotspot.m4
-@@ -367,7 +367,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
+@@ -370,7 +370,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
    AC_MSG_CHECKING([if shenandoah can be built])
    if HOTSPOT_CHECK_JVM_FEATURE(shenandoahgc); then
      if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \
 -       test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then
 +       test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \
-+       test "x$OPENJDK_TARGET_CPU" = "xriscv64" ; then
++       test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then
        AC_MSG_RESULT([yes])
      else
        DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc"
-diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4
-index 16e906bdc..c01fdbcce 100644
---- a/make/autoconf/libraries.m4
-+++ b/make/autoconf/libraries.m4
-@@ -110,7 +110,7 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
-     GLOBAL_LIBS=""
-   fi
- 
--  BASIC_JDKLIB_LIBS=""
-+  BASIC_JDKLIB_LIBS="-latomic"
-   if test "x$TOOLCHAIN_TYPE" != xmicrosoft; then
-     BASIC_JDKLIB_LIBS="-ljava -ljvm"
-   fi
-@@ -147,6 +147,8 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
-         wsock32.lib winmm.lib version.lib psapi.lib"
-   fi
- 
-+  BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic"
-+
-   JDKLIB_LIBS="$BASIC_JDKLIB_LIBS"
-   JDKEXE_LIBS=""
-   JVM_LIBS="$BASIC_JVM_LIBS"
 diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
-index f89b22f5f..48d615992 100644
+index 5d1d9efa39..565ca18e20 100644
 --- a/make/autoconf/platform.m4
 +++ b/make/autoconf/platform.m4
-@@ -120,6 +120,12 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU],
-       VAR_CPU_BITS=64
-       VAR_CPU_ENDIAN=little
-       ;;
-+    riscv32)
-+      VAR_CPU=riscv32
-+      VAR_CPU_ARCH=riscv
-+      VAR_CPU_BITS=32
-+      VAR_CPU_ENDIAN=little
-+      ;;
-     riscv64)
-       VAR_CPU=riscv64
-       VAR_CPU_ARCH=riscv
-@@ -564,8 +570,10 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
-     HOTSPOT_$1_CPU_DEFINE=S390
-   elif test "x$OPENJDK_$1_CPU" = xs390x; then
-     HOTSPOT_$1_CPU_DEFINE=S390
-+  elif test "x$OPENJDK_$1_CPU" = xriscv32; then
-+    HOTSPOT_$1_CPU_DEFINE=RISCV32
-   elif test "x$OPENJDK_$1_CPU" = xriscv64; then
--    HOTSPOT_$1_CPU_DEFINE=RISCV
+@@ -1,5 +1,5 @@
+ #
+-# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved.
+ # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ #
+ # This code is free software; you can redistribute it and/or modify it
+@@ -554,6 +554,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
+     HOTSPOT_$1_CPU_DEFINE=PPC64
+   elif test "x$OPENJDK_$1_CPU" = xppc64le; then
+     HOTSPOT_$1_CPU_DEFINE=PPC64
++  elif test "x$OPENJDK_$1_CPU" = xriscv64; then
 +    HOTSPOT_$1_CPU_DEFINE=RISCV64
-   elif test "x$OPENJDK_$1_CPU" != x; then
-     HOTSPOT_$1_CPU_DEFINE=$(echo $OPENJDK_$1_CPU | tr a-z A-Z)
-   fi
+ 
+   # The cpu defines below are for zero, we don't support them directly.
+   elif test "x$OPENJDK_$1_CPU" = xsparc; then
 diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
-index c5a3ac572..9de6f663c 100644
+index c5a3ac5724..51137b99db 100644
 --- a/make/hotspot/gensrc/GensrcAdlc.gmk
 +++ b/make/hotspot/gensrc/GensrcAdlc.gmk
 @@ -1,5 +1,5 @@
@@ -439,20 +167,12 @@ index c5a3ac572..9de6f663c 100644
  # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  #
  # This code is free software; you can redistribute it and/or modify it
-@@ -150,6 +150,20 @@ ifeq ($(call check-jvm-feature, compiler2), true)
+@@ -150,6 +150,12 @@ ifeq ($(call check-jvm-feature, compiler2), true)
        $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \
      )))
  
-+  ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
-+    AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
-+        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \
-+        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
-+    )))
-+  endif
-+
 +  ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv)
 +    AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
-+        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \
 +        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \
 +    )))
 +  endif
@@ -460,303 +180,14 @@ index c5a3ac572..9de6f663c 100644
    ifeq ($(call check-jvm-feature, shenandoahgc), true)
      AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
          $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \
-diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
-index 1e4ee33a9..ac5d56f0f 100644
---- a/src/hotspot/cpu/aarch64/aarch64.ad
-+++ b/src/hotspot/cpu/aarch64/aarch64.ad
-@@ -2062,15 +2062,17 @@ const bool Matcher::match_rule_supported(int opcode) {
-   return true;  // Per default match rules are supported.
- }
- 
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
- 
-   // TODO
-   // identify extra cases that we might want to provide match rules for
-   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
--  bool ret_value = match_rule_supported(opcode);
-+  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
-+    return false;
-+  }
-   // Add rules here.
- 
--  return ret_value;  // Per default match rules are supported.
-+  return true; // Per default match rules are supported.
- }
- 
- const bool Matcher::has_predicated_vectors(void) {
-@@ -2129,6 +2131,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
-     return size;
- }
- 
-+const bool Matcher::supports_scalable_vector() {
-+  return false;
-+}
-+
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return -1;
-+}
-+
- // Vector ideal reg.
- const uint Matcher::vector_ideal_reg(int len) {
-   switch(len) {
-@@ -15515,15 +15525,16 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
-   ins_pipe(pipe_class_memory);
- %}
- 
--instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
-+instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
-                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
-                               iRegINoSp tmp3, rFlagsReg cr)
- %{
-   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  predicate(((StrIndexOfCharNode*)n) ->encoding() == StrIntrinsicNode::U);
-   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
-          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
- 
--  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
-+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
- 
-   ins_encode %{
-     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
-@@ -15533,6 +15544,25 @@ instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
-   ins_pipe(pipe_class_memory);
- %}
- 
-+instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
-+                              iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                              iRegINoSp tmp3, rFlagsReg cr)
-+%{
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
-+  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
-+
-+  format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
-+
-+  ins_encode %{
-+    __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
-+                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
-+                           $tmp3$$Register);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+
- instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
-                         iRegI_R0 result, rFlagsReg cr)
- %{
-diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
-index fdd2c0ca3..1a35be210 100644
---- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
-+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
-@@ -1593,7 +1593,9 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
- }
- 
- 
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on aarch64");
- 
-   Assembler::Condition acond, ncond;
-   switch (condition) {
-diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-index 5753cc9a6..21c6fdf19 100644
---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-@@ -4829,6 +4829,70 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
-   BIND(DONE);
- }
- 
-+void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
-+                                            Register ch, Register result,
-+                                            Register tmp1, Register tmp2, Register tmp3)
-+{
-+  Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
-+  Register cnt1_neg = cnt1;
-+  Register ch1 = rscratch1;
-+  Register result_tmp = rscratch2;
-+
-+  cbz(cnt1, NOMATCH);
-+
-+  cmp(cnt1, (u1)8);
-+  br(LT, DO1_SHORT);
-+
-+  orr(ch, ch, ch, LSL, 8);
-+  orr(ch, ch, ch, LSL, 16);
-+  orr(ch, ch, ch, LSL, 32);
-+
-+  sub(cnt1, cnt1, 8);
-+  mov(result_tmp, cnt1);
-+  lea(str1, Address(str1, cnt1));
-+  sub(cnt1_neg, zr, cnt1);
-+
-+  mov(tmp3, 0x0101010101010101);
-+
-+  BIND(CH1_LOOP);
-+    ldr(ch1, Address(str1, cnt1_neg));
-+    eor(ch1, ch, ch1);
-+    sub(tmp1, ch1, tmp3);
-+    orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f);
-+    bics(tmp1, tmp1, tmp2);
-+    br(NE, HAS_ZERO);
-+    adds(cnt1_neg, cnt1_neg, 8);
-+    br(LT, CH1_LOOP);
-+
-+    cmp(cnt1_neg, (u1)8);
-+    mov(cnt1_neg, 0);
-+    br(LT, CH1_LOOP);
-+    b(NOMATCH);
-+
-+  BIND(HAS_ZERO);
-+    rev(tmp1, tmp1);
-+    clz(tmp1, tmp1);
-+    add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
-+    b(MATCH);
-+
-+  BIND(DO1_SHORT);
-+    mov(result_tmp, cnt1);
-+    lea(str1, Address(str1, cnt1));
-+    sub(cnt1_neg, zr, cnt1);
-+  BIND(DO1_LOOP);
-+    ldrb(ch1, Address(str1, cnt1_neg));
-+    cmp(ch, ch1);
-+    br(EQ, MATCH);
-+    adds(cnt1_neg, cnt1_neg, 1);
-+    br(LT, DO1_LOOP);
-+  BIND(NOMATCH);
-+    mov(result, -1);
-+    b(DONE);
-+  BIND(MATCH);
-+    add(result, result_tmp, cnt1_neg);
-+  BIND(DONE);
-+}
-+
- // Compare strings.
- void MacroAssembler::string_compare(Register str1, Register str2,
-     Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
-diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
-index 7e23c16a4..c3d472a9a 100644
---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
-+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
-@@ -1260,6 +1260,9 @@ public:
-   void string_indexof_char(Register str1, Register cnt1,
-                            Register ch, Register result,
-                            Register tmp1, Register tmp2, Register tmp3);
-+  void stringL_indexof_char(Register str1, Register cnt1,
-+                            Register ch, Register result,
-+                            Register tmp1, Register tmp2, Register tmp3);
-   void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
-                 FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,
-                 FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3,
-diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
-index 51f2d9ce7..71f83521e 100644
---- a/src/hotspot/cpu/arm/arm.ad
-+++ b/src/hotspot/cpu/arm/arm.ad
-@@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) {
-   return true;  // Per default match rules are supported.
- }
- 
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
- 
-   // TODO
-   // identify extra cases that we might want to provide match rules for
-@@ -1121,6 +1121,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) {
-   return MaxVectorSize;
- }
- 
-+const bool Matcher::supports_scalable_vector() {
-+  return false;
-+}
-+
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return -1;
-+}
-+
- // Vector ideal reg corresponding to specified size in bytes
- const uint Matcher::vector_ideal_reg(int size) {
-   assert(MaxVectorSize >= size, "");
-diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
-index f0a7229aa..2d06d3d58 100644
---- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
-+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
-@@ -1824,7 +1824,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
- }
- 
- 
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on arm");
-+
-   AsmCondition acond = al;
-   AsmCondition ncond = nv;
-   if (opr1 != opr2) {
-diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-index 847f7d61d..d081116be 100644
---- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-@@ -1554,7 +1554,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) {
- }
- 
- 
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on ppc");
-+
-   if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) {
-     load_to_reg(this, opr1, result); // Condition doesn't matter.
-     return;
-diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
-index ebbe80a26..df66a46dc 100644
---- a/src/hotspot/cpu/ppc/ppc.ad
-+++ b/src/hotspot/cpu/ppc/ppc.ad
-@@ -2242,15 +2242,17 @@ const bool Matcher::match_rule_supported(int opcode) {
-   return true;  // Per default match rules are supported.
- }
- 
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
- 
-   // TODO
-   // identify extra cases that we might want to provide match rules for
-   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
--  bool ret_value = match_rule_supported(opcode);
-+  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
-+    return false;
-+  }
-   // Add rules here.
- 
--  return ret_value;  // Per default match rules are supported.
-+  return true; // Per default match rules are supported.
- }
- 
- const bool Matcher::has_predicated_vectors(void) {
-@@ -2310,6 +2312,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
-   return max_vector_size(bt); // Same as max.
- }
- 
-+const bool Matcher::supports_scalable_vector() {
-+  return false;
-+}
-+
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return -1;
-+}
-+
- // PPC implementation uses VSX load/store instructions (if
- // SuperwordUseVSX) which support 4 byte but not arbitrary alignment
- const bool Matcher::misaligned_vectors_ok() {
 diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
 new file mode 100644
-index 000000000..5661b7425
+index 0000000000..31c63abe71
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp
-@@ -0,0 +1,185 @@
+@@ -0,0 +1,177 @@
 +/*
-+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -784,13 +215,13 @@ index 000000000..5661b7425
 +#include "precompiled.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "oops/constMethod.hpp"
++#include "oops/klass.inline.hpp"
 +#include "oops/method.hpp"
 +#include "runtime/frame.inline.hpp"
 +#include "utilities/align.hpp"
 +#include "utilities/debug.hpp"
 +#include "utilities/macros.hpp"
 +
-+
 +int AbstractInterpreter::BasicType_as_index(BasicType type) {
 +  int i = 0;
 +  switch (type) {
@@ -880,7 +311,6 @@ index 000000000..5661b7425
 +  // as determined by a previous call to the size_activation() method.
 +  // It is also guaranteed to be walkable even though it is in a
 +  // skeletal state
-+
 +  assert_cond(method != NULL && caller != NULL && interpreter_frame != NULL);
 +  int max_locals = method->max_locals() * Interpreter::stackElementWords;
 +  int extra_locals = (method->max_locals() - method->size_of_parameters()) *
@@ -894,14 +324,6 @@ index 000000000..5661b7425
 +  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
 +  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
 +  // and sender_sp is fp
-+  //
-+  // The interpreted method entry on riscv aligns SP to 16 bytes
-+  // before generating the fixed part of the activation frame. So there
-+  // may be a gap between the locals block and the saved sender SP. For
-+  // an interpreted caller we need to recreate this gap and exactly
-+  // align the incoming parameters with the caller's temporary
-+  // expression stack. For other types of caller frame it doesn't
-+  // matter.
 +  intptr_t* locals = NULL;
 +  if (caller->is_interpreted_frame()) {
 +    locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1;
@@ -935,6 +357,7 @@ index 000000000..5661b7425
 +    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() +
 +                                                       extra_locals);
 +  }
++
 +  *interpreter_frame->interpreter_frame_cache_addr() =
 +    method->constants()->cache();
 +  *interpreter_frame->interpreter_frame_mirror_addr() =
@@ -942,14 +365,14 @@ index 000000000..5661b7425
 +}
 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp
 new file mode 100644
-index 000000000..40ecf1a6c
+index 0000000000..a83d43a8f1
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp
 @@ -0,0 +1,365 @@
 +/*
 + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -969,6 +392,7 @@ index 000000000..40ecf1a6c
 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 + * or visit www.oracle.com if you need additional information or have any
 + * questions.
++ *
 + */
 +
 +#include <stdio.h>
@@ -983,8 +407,6 @@ index 000000000..40ecf1a6c
 +#include "runtime/interfaceSupport.inline.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +
-+#define __ _masm.
-+
 +int AbstractAssembler::code_fill_byte() {
 +  return 0;
 +}
@@ -999,7 +421,7 @@ index 000000000..40ecf1a6c
 +  }
 +}
 +
-+void Assembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) {
++void Assembler::addw(Register Rd, Register Rn, int64_t increment, Register temp) {
 +  if (is_imm_in_range(increment, 12, 0)) {
 +    addiw(Rd, Rn, increment);
 +  } else {
@@ -1019,7 +441,7 @@ index 000000000..40ecf1a6c
 +  }
 +}
 +
-+void Assembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) {
++void Assembler::subw(Register Rd, Register Rn, int64_t decrement, Register temp) {
 +  if (is_imm_in_range(-decrement, 12, 0)) {
 +    addiw(Rd, Rn, -decrement);
 +  } else {
@@ -1033,11 +455,12 @@ index 000000000..40ecf1a6c
 +  add_uw(Rd, Rs, zr);
 +}
 +
-+void Assembler::li(Register Rd, int64_t imm) {
++void Assembler::_li(Register Rd, int64_t imm) {
 +  // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff
 +  int shift = 12;
 +  int64_t upper = imm, lower = imm;
-+  // Split imm to a lower 12-bit sign-extended part and the remainder, because addi will sign-extend the lower imm.
++  // Split imm to a lower 12-bit sign-extended part and the remainder,
++  // because addi will sign-extend the lower imm.
 +  lower = ((int32_t)imm << 20) >> 20;
 +  upper -= lower;
 +
@@ -1051,8 +474,7 @@ index 000000000..40ecf1a6c
 +    if (lower != 0) {
 +      addi(Rd, Rd, lower);
 +    }
-+  }
-+  else {
++  } else {
 +    // 32-bit integer
 +    Register hi_Rd = zr;
 +    if (upper != 0) {
@@ -1066,30 +488,30 @@ index 000000000..40ecf1a6c
 +}
 +
 +void Assembler::li64(Register Rd, int64_t imm) {
-+  // Load upper 32 bits. Upper = imm[63:32], but if imm[31] = 1 or (imm[31:28] == 0x7ff && imm[19] == 1),
-+  // upper = imm[63:32] + 1.
-+  int64_t lower = imm & 0xffffffff;
-+  lower -= ((lower << 44) >> 44);
-+  int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower;
-+  int32_t upper = (tmp_imm - (int32_t)lower) >> 32;
-+
-+  // Load upper 32 bits
-+  int64_t up = upper, lo = upper;
-+  lo = (lo << 52) >> 52;
-+  up -= lo;
-+  up = (int32_t)up;
-+  lui(Rd, up);
-+  addi(Rd, Rd, lo);
-+
-+  // Load the rest 32 bits.
-+  slli(Rd, Rd, 12);
-+  addi(Rd, Rd, (int32_t)lower >> 20);
-+  slli(Rd, Rd, 12);
-+  lower = ((int32_t)imm << 12) >> 20;
-+  addi(Rd, Rd, lower);
-+  slli(Rd, Rd, 8);
-+  lower = imm & 0xff;
-+  addi(Rd, Rd, lower);
++   // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or
++   // (imm[31:28] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1.
++   int64_t lower = imm & 0xffffffff;
++   lower -= ((lower << 44) >> 44);
++   int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower;
++   int32_t upper = (tmp_imm - (int32_t)lower) >> 32;
++
++   // Load upper 32 bits
++   int64_t up = upper, lo = upper;
++   lo = (lo << 52) >> 52;
++   up -= lo;
++   up = (int32_t)up;
++   lui(Rd, up);
++   addi(Rd, Rd, lo);
++
++   // Load the rest 32 bits.
++   slli(Rd, Rd, 12);
++   addi(Rd, Rd, (int32_t)lower >> 20);
++   slli(Rd, Rd, 12);
++   lower = ((int32_t)imm << 12) >> 20;
++   addi(Rd, Rd, lower);
++   slli(Rd, Rd, 8);
++   lower = imm & 0xff;
++   addi(Rd, Rd, lower);
 +}
 +
 +void Assembler::li32(Register Rd, int32_t imm) {
@@ -1162,15 +584,16 @@ index 000000000..40ecf1a6c
 +
 +#define INSN(NAME, REGISTER)                                   \
 +  void Assembler::NAME(const Address &adr, Register temp) {    \
-+    switch(adr.getMode()) {                                    \
++    switch (adr.getMode()) {                                   \
 +      case Address::literal: {                                 \
 +        code_section()->relocate(pc(), adr.rspec());           \
 +        NAME(adr.target(), temp);                              \
 +        break;                                                 \
 +      }                                                        \
 +      case Address::base_plus_offset: {                        \
-+          Address tmp_adr = form_address(adr.base(), adr.offset(), 12, temp); \
-+          jalr(REGISTER, tmp_adr.base(), tmp_adr.offset());    \
++        int32_t offset = 0;                                    \
++        baseOffset(temp, adr, offset);                         \
++        jalr(REGISTER, temp, offset);                          \
 +        break;                                                 \
 +      }                                                        \
 +      default:                                                 \
@@ -1245,7 +668,7 @@ index 000000000..40ecf1a6c
 +  addi(Rd, Rd, (imm64 >> 6) & 0x7ff);
 +  slli(Rd, Rd, 6);
 +
-+  // Here, remove the addi instruct and return the offset directly. This offset will be used by following jalr/ld.
++  // This offset will be used by following jalr/ld.
 +  offset = imm64 & 0x3f;
 +}
 +
@@ -1313,14 +736,14 @@ index 000000000..40ecf1a6c
 +}
 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
 new file mode 100644
-index 000000000..d4da30ed6
+index 0000000000..9e7d271860
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
-@@ -0,0 +1,2004 @@
+@@ -0,0 +1,3057 @@
 +/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -1348,6 +771,7 @@ index 000000000..d4da30ed6
 +
 +#include "asm/register.hpp"
 +#include "assembler_riscv.inline.hpp"
++#include "metaprogramming/enableIf.hpp"
 +
 +#define XLEN 64
 +
@@ -1359,10 +783,10 @@ index 000000000..d4da30ed6
 +class Argument {
 + public:
 +  enum {
-+    n_int_register_parameters_c   = 8,  // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
-+    n_float_register_parameters_c = 8,  // f10, f11, ... f17 (c_farg0, c_farg1, ... )
++    n_int_register_parameters_c   = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...)
++    n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... )
 +
-+    n_int_register_parameters_j   = 8, // x11, ... x17, x10 (rj_rarg0, j_rarg1, ...)
++    n_int_register_parameters_j   = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...)
 +    n_float_register_parameters_j = 8  // f10, f11, ... f17 (j_farg0, j_farg1, ...)
 +  };
 +};
@@ -1386,7 +810,21 @@ index 000000000..d4da30ed6
 +REGISTER_DECLARATION(FloatRegister, c_farg6, f16);
 +REGISTER_DECLARATION(FloatRegister, c_farg7, f17);
 +
-+// java function register(caller-save registers)
++// Symbolically name the register arguments used by the Java calling convention.
++// We have control over the convention for java so we can do what we please.
++// What pleases us is to offset the java calling convention so that when
++// we call a suitable jni method the arguments are lined up and we don't
++// have to do much shuffling. A suitable jni method is non-static and a
++// small number of arguments.
++//
++// |------------------------------------------------------------------------|
++// | c_rarg0  c_rarg1  c_rarg2  c_rarg3  c_rarg4  c_rarg5  c_rarg6  c_rarg7 |
++// |------------------------------------------------------------------------|
++// | x10      x11      x12      x13      x14      x15      x16      x17     |
++// |------------------------------------------------------------------------|
++// | j_rarg7  j_rarg0  j_rarg1  j_rarg2  j_rarg3  j_rarg4  j_rarg5  j_rarg6 |
++// |------------------------------------------------------------------------|
++
 +REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
 +REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
 +REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
@@ -1396,6 +834,8 @@ index 000000000..d4da30ed6
 +REGISTER_DECLARATION(Register, j_rarg6, c_rarg7);
 +REGISTER_DECLARATION(Register, j_rarg7, c_rarg0);
 +
++// Java floating args are passed as per C
++
 +REGISTER_DECLARATION(FloatRegister, j_farg0, f10);
 +REGISTER_DECLARATION(FloatRegister, j_farg1, f11);
 +REGISTER_DECLARATION(FloatRegister, j_farg2, f12);
@@ -1412,6 +852,9 @@ index 000000000..d4da30ed6
 +// thread pointer
 +REGISTER_DECLARATION(Register, tp,        x4);
 +
++// registers used to hold VM data either temporarily within a method
++// or across method calls
++
 +// volatile (caller-save) registers
 +
 +// current method -- must be in a call-clobbered register
@@ -1434,9 +877,6 @@ index 000000000..d4da30ed6
 +// locals on stack
 +REGISTER_DECLARATION(Register, xlocals,   x24);
 +
-+/* If you use x4(tp) as java thread pointer according to the instruction manual,
-+ * it overlaps with the register used by c++ thread.
-+ */
 +// java thread pointer
 +REGISTER_DECLARATION(Register, xthread,   x23);
 +// bytecode pointer
@@ -1446,13 +886,13 @@ index 000000000..d4da30ed6
 +// Java stack pointer
 +REGISTER_DECLARATION(Register, esp,       x20);
 +
-+// tempory register(caller-save registers)
++// temporary register(caller-save registers)
 +REGISTER_DECLARATION(Register, t0, x5);
 +REGISTER_DECLARATION(Register, t1, x6);
 +REGISTER_DECLARATION(Register, t2, x7);
 +
 +const Register g_INTArgReg[Argument::n_int_register_parameters_c] = {
-+  c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
++  c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7
 +};
 +
 +const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = {
@@ -1469,6 +909,7 @@ index 000000000..d4da30ed6
 +
 + private:
 +  Register _base;
++  Register _index;
 +  int64_t _offset;
 +  enum mode _mode;
 +
@@ -1481,46 +922,49 @@ index 000000000..d4da30ed6
 +
 + public:
 +  Address()
-+    : _base(noreg), _offset(0), _mode(no_mode),          _target(NULL) { }
++    : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { }
 +  Address(Register r)
-+    : _base(r),     _offset(0), _mode(base_plus_offset), _target(NULL) { }
++    : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { }
 +  Address(Register r, int o)
-+    : _base(r),     _offset(o), _mode(base_plus_offset), _target(NULL) { }
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
 +  Address(Register r, long o)
-+    : _base(r),     _offset(o), _mode(base_plus_offset), _target(NULL) { }
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
 +  Address(Register r, long long o)
-+    : _base(r),     _offset(o), _mode(base_plus_offset), _target(NULL) { }
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
 +  Address(Register r, unsigned int o)
-+    : _base(r),     _offset(o), _mode(base_plus_offset), _target(NULL) { }
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
 +  Address(Register r, unsigned long o)
-+    : _base(r),     _offset(o), _mode(base_plus_offset), _target(NULL) { }
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
 +  Address(Register r, unsigned long long o)
-+    : _base(r),     _offset(o), _mode(base_plus_offset), _target(NULL) { }
++    : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { }
 +#ifdef ASSERT
 +  Address(Register r, ByteSize disp)
-+    : _base(r), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(NULL) { }
++    : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { }
 +#endif
 +  Address(address target, RelocationHolder const& rspec)
 +    : _base(noreg),
++      _index(noreg),
 +      _offset(0),
 +      _mode(literal),
 +      _rspec(rspec),
-+      _target(target)  { }
++      _target(target) { }
 +  Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type);
 +
 +  const Register base() const {
-+    guarantee((_mode == base_plus_offset || _mode == pcrel || _mode == literal), "wrong mode");
++    guarantee((_mode == base_plus_offset | _mode == pcrel | _mode == literal), "wrong mode");
 +    return _base;
 +  }
 +  long offset() const {
 +    return _offset;
 +  }
-+
++  Register index() const {
++    return _index;
++  }
 +  mode getMode() const {
 +    return _mode;
 +  }
 +
-+  bool uses(Register reg) const { return _base == reg;}
++  bool uses(Register reg) const { return _base == reg; }
 +  const address target() const { return _target; }
 +  const RelocationHolder& rspec() const { return _rspec; }
 +  ~Address() {
@@ -1584,34 +1028,40 @@ index 000000000..d4da30ed6
 +    rdy = 0b111,     // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid.
 +  };
 +
-+  Address form_address_complex(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) {
-+    assert_different_registers(noreg, temp, base);
-+    int64_t upper = offset, lower = offset;
-+
-+    int8_t shift = 64 - expect_offbits;
-+    lower = (offset << shift) >> shift;
-+    upper -= lower;
-+
-+    li(temp, upper);
-+    add(temp, temp, base);
-+    return Address(temp, lower);
++  void baseOffset32(Register Rd, const Address &adr, int32_t &offset) {
++    assert(Rd != noreg, "Rd must not be empty register!");
++    guarantee(Rd != adr.base(), "should use different registers!");
++    if (is_offset_in_range(adr.offset(), 32)) {
++      int32_t imm = adr.offset();
++      int32_t upper = imm, lower = imm;
++      lower = (imm << 20) >> 20;
++      upper -= lower;
++      lui(Rd, upper);
++      offset = lower;
++    } else {
++      movptr_with_offset(Rd, (address)(uintptr_t)adr.offset(), offset);
++    }
++    add(Rd, Rd, adr.base());
 +  }
 +
-+  Address form_address(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) {
-+    if (is_offset_in_range(offset, expect_offbits)) {
-+      return Address(base, offset);
++  void baseOffset(Register Rd, const Address &adr, int32_t &offset) {
++    if (is_offset_in_range(adr.offset(), 12)) {
++      assert(Rd != noreg, "Rd must not be empty register!");
++      addi(Rd, adr.base(), adr.offset());
++      offset = 0;
++    } else {
++      baseOffset32(Rd, adr, offset);
 +    }
-+    return form_address_complex(base, offset, expect_offbits, temp);
 +  }
 +
-+  void li(Register Rd, int64_t imm);  // optimized load immediate
++  void _li(Register Rd, int64_t imm);  // optimized load immediate
 +  void li32(Register Rd, int32_t imm);
 +  void li64(Register Rd, int64_t imm);
 +  void movptr(Register Rd, address addr);
 +  void movptr_with_offset(Register Rd, address addr, int32_t &offset);
 +  void movptr(Register Rd, uintptr_t imm64);
 +  void j(const address &dest, Register temp = t0);
-+  void j(const Address &adr, Register temp = t0) ;
++  void j(const Address &adr, Register temp = t0);
 +  void j(Label &l, Register temp = t0);
 +  void jal(Label &l, Register temp = t0);
 +  void jal(const address &dest, Register temp = t0);
@@ -1633,7 +1083,7 @@ index 000000000..d4da30ed6
 +  static inline uint32_t extract(uint32_t val, unsigned msb, unsigned lsb) {
 +    assert_cond(msb >= lsb && msb <= 31);
 +    unsigned nbits = msb - lsb + 1;
-+    uint32_t mask = checked_cast<uint32_t>(right_n_bits(nbits));
++    uint32_t mask = (1U << nbits) - 1;
 +    uint32_t result = val >> lsb;
 +    result &= mask;
 +    return result;
@@ -1650,8 +1100,8 @@ index 000000000..d4da30ed6
 +    assert_cond(a != NULL);
 +    assert_cond(msb >= lsb && msb <= 31);
 +    unsigned nbits = msb - lsb + 1;
-+    guarantee(val < (1ULL << nbits), "Field too big for insn");
-+    unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
++    guarantee(val < (1U << nbits), "Field too big for insn");
++    unsigned mask = (1U << nbits) - 1;
 +    val <<= lsb;
 +    mask <<= lsb;
 +    unsigned target = *(unsigned *)a;
@@ -1680,11 +1130,11 @@ index 000000000..d4da30ed6
 +    emit_int32((jint)insn);
 +  }
 +
-+  void halt() {
++  void _halt() {
 +    emit_int32(0);
 +  }
 +
-+// Rigster Instruction
++// Register Instruction
 +#define INSN(NAME, op, funct3, funct7)                          \
 +  void NAME(Register Rd, Register Rs1, Register Rs2) {          \
 +    unsigned insn = 0;                                          \
@@ -1697,18 +1147,18 @@ index 000000000..d4da30ed6
 +    emit(insn);                                                 \
 +  }
 +
-+  INSN(add,   0b0110011, 0b000, 0b0000000);
-+  INSN(sub,   0b0110011, 0b000, 0b0100000);
-+  INSN(andr,  0b0110011, 0b111, 0b0000000);
-+  INSN(orr,   0b0110011, 0b110, 0b0000000);
-+  INSN(xorr,  0b0110011, 0b100, 0b0000000);
++  INSN(_add,  0b0110011, 0b000, 0b0000000);
++  INSN(_sub,  0b0110011, 0b000, 0b0100000);
++  INSN(_andr, 0b0110011, 0b111, 0b0000000);
++  INSN(_orr,  0b0110011, 0b110, 0b0000000);
++  INSN(_xorr, 0b0110011, 0b100, 0b0000000);
 +  INSN(sll,   0b0110011, 0b001, 0b0000000);
 +  INSN(sra,   0b0110011, 0b101, 0b0100000);
 +  INSN(srl,   0b0110011, 0b101, 0b0000000);
 +  INSN(slt,   0b0110011, 0b010, 0b0000000);
 +  INSN(sltu,  0b0110011, 0b011, 0b0000000);
-+  INSN(addw,  0b0111011, 0b000, 0b0000000);
-+  INSN(subw,  0b0111011, 0b000, 0b0100000);
++  INSN(_addw, 0b0111011, 0b000, 0b0000000);
++  INSN(_subw, 0b0111011, 0b000, 0b0100000);
 +  INSN(sllw,  0b0111011, 0b001, 0b0000000);
 +  INSN(sraw,  0b0111011, 0b101, 0b0100000);
 +  INSN(srlw,  0b0111011, 0b101, 0b0000000);
@@ -1726,22 +1176,20 @@ index 000000000..d4da30ed6
 +  INSN(remw,  0b0111011, 0b110, 0b0000001);
 +  INSN(remuw, 0b0111011, 0b111, 0b0000001);
 +
-+  // Vector Configuration Instruction
-+  INSN(vsetvl, 0b1010111, 0b111, 0b1000000);
-+
 +#undef INSN
 +
 +#define INSN_ENTRY_RELOC(result_type, header)                               \
 +  result_type header {                                                      \
++    InstructionMark im(this);                                               \
 +    guarantee(rtype == relocInfo::internal_word_type,                       \
 +              "only internal_word_type relocs make sense here");            \
-+    code_section()->relocate(pc(), InternalAddress(dest).rspec());
++    code_section()->relocate(inst_mark(), InternalAddress(dest).rspec());
 +
 +  // Load/store register (all modes)
 +#define INSN(NAME, op, funct3)                                                                     \
 +  void NAME(Register Rd, Register Rs, const int32_t offset) {                                      \
-+    unsigned insn = 0;                                                                             \
 +    guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                               \
++    unsigned insn = 0;                                                                             \
 +    int32_t val = offset & 0xfff;                                                                  \
 +    patch((address)&insn, 6, 0, op);                                                               \
 +    patch((address)&insn, 14, 12, funct3);                                                         \
@@ -1749,7 +1197,19 @@ index 000000000..d4da30ed6
 +    patch_reg((address)&insn, 7, Rd);                                                              \
 +    patch((address)&insn, 31, 20, val);                                                            \
 +    emit(insn);                                                                                    \
-+  }                                                                                                \
++  }
++
++  INSN(lb,  0b0000011, 0b000);
++  INSN(lbu, 0b0000011, 0b100);
++  INSN(lh,  0b0000011, 0b001);
++  INSN(lhu, 0b0000011, 0b101);
++  INSN(_lw, 0b0000011, 0b010);
++  INSN(lwu, 0b0000011, 0b110);
++  INSN(_ld, 0b0000011, 0b011);
++
++#undef INSN
++
++#define INSN(NAME)                                                                                 \
 +  void NAME(Register Rd, address dest) {                                                           \
 +    assert_cond(dest != NULL);                                                                     \
 +    int64_t distance = (dest - pc());                                                              \
@@ -1766,7 +1226,7 @@ index 000000000..d4da30ed6
 +    NAME(Rd, dest);                                                                                \
 +  }                                                                                                \
 +  void NAME(Register Rd, const Address &adr, Register temp = t0) {                                 \
-+    switch(adr.getMode()) {                                                                        \
++    switch (adr.getMode()) {                                                                       \
 +      case Address::literal: {                                                                     \
 +        code_section()->relocate(pc(), adr.rspec());                                               \
 +        NAME(Rd, adr.target());                                                                    \
@@ -1776,7 +1236,14 @@ index 000000000..d4da30ed6
 +        if (is_offset_in_range(adr.offset(), 12)) {                                                \
 +          NAME(Rd, adr.base(), adr.offset());                                                      \
 +        } else {                                                                                   \
-+          NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, Rd == adr.base() ? temp : Rd)); \
++          int32_t offset = 0;                                                                      \
++          if (Rd == adr.base()) {                                                                  \
++            baseOffset32(temp, adr, offset);                                                       \
++            NAME(Rd, temp, offset);                                                                \
++          } else {                                                                                 \
++            baseOffset32(Rd, adr, offset);                                                         \
++            NAME(Rd, Rd, offset);                                                                  \
++          }                                                                                        \
 +        }                                                                                          \
 +        break;                                                                                     \
 +      }                                                                                            \
@@ -1788,20 +1255,20 @@ index 000000000..d4da30ed6
 +    wrap_label(Rd, L, &Assembler::NAME);                                                           \
 +  }
 +
-+  INSN(lb,  0b0000011, 0b000);
-+  INSN(lbu, 0b0000011, 0b100);
-+  INSN(ld,  0b0000011, 0b011);
-+  INSN(lh,  0b0000011, 0b001);
-+  INSN(lhu, 0b0000011, 0b101);
-+  INSN(lw,  0b0000011, 0b010);
-+  INSN(lwu, 0b0000011, 0b110);
++  INSN(lb);
++  INSN(lbu);
++  INSN(lh);
++  INSN(lhu);
++  INSN(lw);
++  INSN(lwu);
++  INSN(ld);
 +
 +#undef INSN
 +
 +#define INSN(NAME, op, funct3)                                                                     \
 +  void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                                 \
-+    unsigned insn = 0;                                                                             \
 +    guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                               \
++    unsigned insn = 0;                                                                             \
 +    uint32_t val = offset & 0xfff;                                                                 \
 +    patch((address)&insn, 6, 0, op);                                                               \
 +    patch((address)&insn, 14, 12, funct3);                                                         \
@@ -1809,7 +1276,14 @@ index 000000000..d4da30ed6
 +    patch_reg((address)&insn, 7, Rd);                                                              \
 +    patch((address)&insn, 31, 20, val);                                                            \
 +    emit(insn);                                                                                    \
-+  }                                                                                                \
++  }
++
++  INSN(flw,  0b0000111, 0b010);
++  INSN(_fld, 0b0000111, 0b011);
++
++#undef INSN
++
++#define INSN(NAME)                                                                                 \
 +  void NAME(FloatRegister Rd, address dest, Register temp = t0) {                                  \
 +    assert_cond(dest != NULL);                                                                     \
 +    int64_t distance = (dest - pc());                                                              \
@@ -1826,7 +1300,7 @@ index 000000000..d4da30ed6
 +    NAME(Rd, dest, temp);                                                                          \
 +  }                                                                                                \
 +  void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) {                            \
-+    switch(adr.getMode()) {                                                                        \
++    switch (adr.getMode()) {                                                                       \
 +      case Address::literal: {                                                                     \
 +        code_section()->relocate(pc(), adr.rspec());                                               \
 +        NAME(Rd, adr.target(), temp);                                                              \
@@ -1836,7 +1310,9 @@ index 000000000..d4da30ed6
 +        if (is_offset_in_range(adr.offset(), 12)) {                                                \
 +          NAME(Rd, adr.base(), adr.offset());                                                      \
 +        } else {                                                                                   \
-+          NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, temp));                      \
++          int32_t offset = 0;                                                                      \
++          baseOffset32(temp, adr, offset);                                                         \
++          NAME(Rd, temp, offset);                                                                  \
 +        }                                                                                          \
 +        break;                                                                                     \
 +      }                                                                                            \
@@ -1845,14 +1321,14 @@ index 000000000..d4da30ed6
 +    }                                                                                              \
 +  }
 +
-+  INSN(flw, 0b0000111, 0b010);
-+  INSN(fld, 0b0000111, 0b011);
++  INSN(flw);
++  INSN(fld);
 +#undef INSN
 +
 +#define INSN(NAME, op, funct3)                                                                           \
 +  void NAME(Register Rs1, Register Rs2, const int64_t offset) {                                          \
-+    unsigned insn = 0;                                                                                   \
 +    guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid.");                                     \
++    unsigned insn = 0;                                                                                   \
 +    uint32_t val  = offset & 0x1fff;                                                                     \
 +    uint32_t val11 = (val >> 11) & 0x1;                                                                  \
 +    uint32_t val12 = (val >> 12) & 0x1;                                                                  \
@@ -1867,7 +1343,18 @@ index 000000000..d4da30ed6
 +    patch((address)&insn, 30, 25, high);                                                                 \
 +    patch((address)&insn, 31, val12);                                                                    \
 +    emit(insn);                                                                                          \
-+  }                                                                                                      \
++  }
++
++  INSN(_beq, 0b1100011, 0b000);
++  INSN(_bne, 0b1100011, 0b001);
++  INSN(bge,  0b1100011, 0b101);
++  INSN(bgeu, 0b1100011, 0b111);
++  INSN(blt,  0b1100011, 0b100);
++  INSN(bltu, 0b1100011, 0b110);
++
++#undef INSN
++
++#define INSN(NAME)                                                                                       \
 +  void NAME(Register Rs1, Register Rs2, const address dest) {                                            \
 +    assert_cond(dest != NULL);                                                                           \
 +    int64_t offset = (dest - pc());                                                                      \
@@ -1878,12 +1365,12 @@ index 000000000..d4da30ed6
 +    NAME(Rs1, Rs2, dest);                                                                                \
 +  }
 +
-+  INSN(beq,  0b1100011, 0b000);
-+  INSN(bge,  0b1100011, 0b101);
-+  INSN(bgeu, 0b1100011, 0b111);
-+  INSN(blt,  0b1100011, 0b100);
-+  INSN(bltu, 0b1100011, 0b110);
-+  INSN(bne,  0b1100011, 0b001);
++  INSN(beq);
++  INSN(bne);
++  INSN(bge);
++  INSN(bgeu);
++  INSN(blt);
++  INSN(bltu);
 +
 +#undef INSN
 +
@@ -1903,8 +1390,8 @@ index 000000000..d4da30ed6
 +
 +#define INSN(NAME, REGISTER, op, funct3)                                                                    \
 +  void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) {                                             \
-+    unsigned insn = 0;                                                                                      \
 +    guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                                        \
++    unsigned insn = 0;                                                                                      \
 +    uint32_t val  = offset & 0xfff;                                                                         \
 +    uint32_t low  = val & 0x1f;                                                                             \
 +    uint32_t high = (val >> 5) & 0x7f;                                                                      \
@@ -1916,16 +1403,27 @@ index 000000000..d4da30ed6
 +    patch((address)&insn, 31, 25, high);                                                                    \
 +    emit(insn);                                                                                             \
 +  }                                                                                                         \
++
++  INSN(sb,   Register,      0b0100011, 0b000);
++  INSN(sh,   Register,      0b0100011, 0b001);
++  INSN(_sw,  Register,      0b0100011, 0b010);
++  INSN(_sd,  Register,      0b0100011, 0b011);
++  INSN(fsw,  FloatRegister, 0b0100111, 0b010);
++  INSN(_fsd, FloatRegister, 0b0100111, 0b011);
++
++#undef INSN
++
++#define INSN(NAME, REGISTER)                                                                                \
 +  INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0))   \
 +    NAME(Rs, dest, temp);                                                                                   \
 +  }
 +
-+  INSN(sb,  Register,      0b0100011, 0b000);
-+  INSN(sh,  Register,      0b0100011, 0b001);
-+  INSN(sw,  Register,      0b0100011, 0b010);
-+  INSN(sd,  Register,      0b0100011, 0b011);
-+  INSN(fsw, FloatRegister, 0b0100111, 0b010);
-+  INSN(fsd, FloatRegister, 0b0100111, 0b011);
++  INSN(sb,  Register);
++  INSN(sh,  Register);
++  INSN(sw,  Register);
++  INSN(sd,  Register);
++  INSN(fsw, FloatRegister);
++  INSN(fsd, FloatRegister);
 +
 +#undef INSN
 +
@@ -1944,7 +1442,7 @@ index 000000000..d4da30ed6
 +    }                                                                                              \
 +  }                                                                                                \
 +  void NAME(Register Rs, const Address &adr, Register temp = t0) {                                 \
-+    switch(adr.getMode()) {                                                                        \
++    switch (adr.getMode()) {                                                                       \
 +      case Address::literal: {                                                                     \
 +        assert_different_registers(Rs, temp);                                                      \
 +        code_section()->relocate(pc(), adr.rspec());                                               \
@@ -1955,8 +1453,10 @@ index 000000000..d4da30ed6
 +        if (is_offset_in_range(adr.offset(), 12)) {                                                \
 +          NAME(Rs, adr.base(), adr.offset());                                                      \
 +        } else {                                                                                   \
++          int32_t offset= 0;                                                                       \
 +          assert_different_registers(Rs, temp);                                                    \
-+          NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp));                      \
++          baseOffset32(temp, adr, offset);                                                         \
++          NAME(Rs, temp, offset);                                                                  \
 +        }                                                                                          \
 +        break;                                                                                     \
 +      }                                                                                            \
@@ -1986,7 +1486,7 @@ index 000000000..d4da30ed6
 +    }                                                                                              \
 +  }                                                                                                \
 +  void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) {                            \
-+    switch(adr.getMode()) {                                                                        \
++    switch (adr.getMode()) {                                                                       \
 +      case Address::literal: {                                                                     \
 +        code_section()->relocate(pc(), adr.rspec());                                               \
 +        NAME(Rs, adr.target(), temp);                                                              \
@@ -1996,7 +1496,9 @@ index 000000000..d4da30ed6
 +        if (is_offset_in_range(adr.offset(), 12)) {                                                \
 +          NAME(Rs, adr.base(), adr.offset());                                                      \
 +        } else {                                                                                   \
-+          NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp));                      \
++          int32_t offset = 0;                                                                      \
++          baseOffset32(temp, adr, offset);                                                         \
++          NAME(Rs, temp, offset);                                                                  \
 +        }                                                                                          \
 +        break;                                                                                     \
 +      }                                                                                            \
@@ -2050,8 +1552,8 @@ index 000000000..d4da30ed6
 +
 +#define INSN(NAME, op)                                                                        \
 +  void NAME(Register Rd, const int32_t offset) {                                              \
-+    unsigned insn = 0;                                                                        \
 +    guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid.");                          \
++    unsigned insn = 0;                                                                        \
 +    patch((address)&insn, 6, 0, op);                                                          \
 +    patch_reg((address)&insn, 7, Rd);                                                         \
 +    patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff));                         \
@@ -2059,7 +1561,13 @@ index 000000000..d4da30ed6
 +    patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff));                         \
 +    patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1));                              \
 +    emit(insn);                                                                               \
-+  }                                                                                           \
++  }
++
++  INSN(_jal, 0b1101111);
++
++#undef INSN
++
++#define INSN(NAME)                                                                            \
 +  void NAME(Register Rd, const address dest, Register temp = t0) {                            \
 +    assert_cond(dest != NULL);                                                                \
 +    int64_t offset = dest - pc();                                                             \
@@ -2077,7 +1585,7 @@ index 000000000..d4da30ed6
 +    wrap_label(Rd, L, temp, &Assembler::NAME);                                                \
 +  }
 +
-+  INSN(jal, 0b1101111);
++  INSN(jal);
 +
 +#undef INSN
 +
@@ -2085,8 +1593,8 @@ index 000000000..d4da30ed6
 +
 +#define INSN(NAME, op, funct)                                                              \
 +  void NAME(Register Rd, Register Rs, const int32_t offset) {                              \
-+    unsigned insn = 0;                                                                     \
 +    guarantee(is_offset_in_range(offset, 12), "offset is invalid.");                       \
++    unsigned insn = 0;                                                                     \
 +    patch((address)&insn, 6, 0, op);                                                       \
 +    patch_reg((address)&insn, 7, Rd);                                                      \
 +    patch((address)&insn, 14, 12, funct);                                                  \
@@ -2096,7 +1604,7 @@ index 000000000..d4da30ed6
 +    emit(insn);                                                                            \
 +  }
 +
-+  INSN(jalr, 0b1100111, 0b000);
++  INSN(_jalr, 0b1100111, 0b000);
 +
 +#undef INSN
 +
@@ -2131,7 +1639,8 @@ index 000000000..d4da30ed6
 +  }
 +
 +  INSN(ecall,   0b1110011, 0b000, 0b000000000000);
-+  INSN(ebreak,  0b1110011, 0b000, 0b000000000001);
++  INSN(_ebreak, 0b1110011, 0b000, 0b000000000001);
++
 +#undef INSN
 +
 +enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11};
@@ -2239,12 +1748,12 @@ index 000000000..d4da30ed6
 +    emit(insn);                                                                             \
 +  }
 +
-+  INSN(addi,  0b0010011, 0b000);
-+  INSN(slti,  0b0010011, 0b010);
-+  INSN(addiw, 0b0011011, 0b000);
-+  INSN(and_imm12,  0b0010011, 0b111);
-+  INSN(ori,   0b0010011, 0b110);
-+  INSN(xori,  0b0010011, 0b100);
++  INSN(_addi,      0b0010011, 0b000);
++  INSN(slti,       0b0010011, 0b010);
++  INSN(_addiw,     0b0011011, 0b000);
++  INSN(_and_imm12, 0b0010011, 0b111);
++  INSN(ori,        0b0010011, 0b110);
++  INSN(xori,       0b0010011, 0b100);
 +
 +#undef INSN
 +
@@ -2278,9 +1787,9 @@ index 000000000..d4da30ed6
 +    emit(insn);                                                          \
 +  }
 +
-+  INSN(slli,  0b0010011, 0b001, 0b000000);
-+  INSN(srai,  0b0010011, 0b101, 0b010000);
-+  INSN(srli,  0b0010011, 0b101, 0b000000);
++  INSN(_slli, 0b0010011, 0b001, 0b000000);
++  INSN(_srai, 0b0010011, 0b101, 0b010000);
++  INSN(_srli, 0b0010011, 0b101, 0b000000);
 +
 +#undef INSN
 +
@@ -2316,7 +1825,7 @@ index 000000000..d4da30ed6
 +    emit(insn);                                                         \
 +  }
 +
-+  INSN(lui,   0b0110111);
++  INSN(_lui,  0b0110111);
 +  INSN(auipc, 0b0010111);
 +
 +#undef INSN
@@ -2592,6 +2101,23 @@ index 000000000..d4da30ed6
 +
 +#undef patch_vtype
 +
++#define INSN(NAME, op, funct3, funct7)                          \
++  void NAME(Register Rd, Register Rs1, Register Rs2) {          \
++    unsigned insn = 0;                                          \
++    patch((address)&insn, 6,  0, op);                           \
++    patch((address)&insn, 14, 12, funct3);                      \
++    patch((address)&insn, 31, 25, funct7);                      \
++    patch_reg((address)&insn, 7, Rd);                           \
++    patch_reg((address)&insn, 15, Rs1);                         \
++    patch_reg((address)&insn, 20, Rs2);                         \
++    emit(insn);                                                 \
++  }
++
++  // Vector Configuration Instruction
++  INSN(vsetvl, 0b1010111, 0b111, 0b1000000);
++
++#undef INSN
++
 +enum VectorMask {
 +  v0_t = 0b0,
 +  unmasked = 0b1
@@ -3159,7 +2685,7 @@ index 000000000..d4da30ed6
 +
 +// ====================================
 +// RISC-V Bit-Manipulation Extension
-+// Currently only support Zba and Zbb.
++// Currently only support Zba, Zbb and Zbs bitmanip extensions.
 +// ====================================
 +#define INSN(NAME, op, funct3, funct7)                  \
 +  void NAME(Register Rd, Register Rs1, Register Rs2) {  \
@@ -3234,11 +2760,12 @@ index 000000000..d4da30ed6
 +
 +  INSN(rori,    0b0010011, 0b101, 0b011000);
 +  INSN(slli_uw, 0b0011011, 0b001, 0b000010);
++  INSN(bexti,   0b0010011, 0b101, 0b010010);
 +
 +#undef INSN
 +
 +#define INSN(NAME, op, funct3, funct7)                  \
-+  void NAME(Register Rd, Register Rs1, unsigned shamt){ \
++  void NAME(Register Rd, Register Rs1, unsigned shamt) {\
 +    guarantee(shamt <= 0x1f, "Shamt is invalid");       \
 +    unsigned insn = 0;                                  \
 +    patch((address)&insn, 6, 0, op);                    \
@@ -3251,9 +2778,966 @@ index 000000000..d4da30ed6
 +  }
 +
 +  INSN(roriw, 0b0011011, 0b101, 0b0110000);
-+  
++
 +#undef INSN
 +
++// ========================================
++// RISC-V Compressed Instructions Extension
++// ========================================
++// Note:
++// 1. When UseRVC is enabled, 32-bit instructions under 'CompressibleRegion's will be
++//    transformed to 16-bit instructions if compressible.
++// 2. RVC instructions in Assembler always begin with 'c_' prefix, as 'c_li',
++//    but most of time we have no need to explicitly use these instructions.
++// 3. 'CompressibleRegion' is introduced to hint instructions in this Region's RTTI range
++//    are qualified to be compressed with their 2-byte versions.
++//    An example:
++//
++//      CompressibleRegion cr(_masm);
++//      __ andr(...);      // this instruction could change to c.and if able to
++//
++// 4. Using -XX:PrintAssemblyOptions=no-aliases could distinguish RVC instructions from
++//    normal ones.
++//
++
++private:
++  bool _in_compressible_region;
++public:
++  bool in_compressible_region() const { return _in_compressible_region; }
++  void set_in_compressible_region(bool b) { _in_compressible_region = b; }
++public:
++
++  // a compressible region
++  class CompressibleRegion : public StackObj {
++  protected:
++    Assembler *_masm;
++    bool _saved_in_compressible_region;
++  public:
++    CompressibleRegion(Assembler *_masm)
++    : _masm(_masm)
++    , _saved_in_compressible_region(_masm->in_compressible_region()) {
++      _masm->set_in_compressible_region(true);
++    }
++    ~CompressibleRegion() {
++      _masm->set_in_compressible_region(_saved_in_compressible_region);
++    }
++  };
++
++  // patch a 16-bit instruction.
++  static void c_patch(address a, unsigned msb, unsigned lsb, uint16_t val) {
++    assert_cond(a != NULL);
++    assert_cond(msb >= lsb && msb <= 15);
++    unsigned nbits = msb - lsb + 1;
++    guarantee(val < (1U << nbits), "Field too big for insn");
++    uint16_t mask = (1U << nbits) - 1;
++    val <<= lsb;
++    mask <<= lsb;
++    uint16_t target = *(uint16_t *)a;
++    target &= ~mask;
++    target |= val;
++    *(uint16_t *)a = target;
++  }
++
++  static void c_patch(address a, unsigned bit, uint16_t val) {
++    c_patch(a, bit, bit, val);
++  }
++
++  // patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits)
++  static void c_patch_reg(address a, unsigned lsb, Register reg) {
++    c_patch(a, lsb + 4, lsb, reg->encoding_nocheck());
++  }
++
++  // patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits)
++  static void c_patch_compressed_reg(address a, unsigned lsb, Register reg) {
++    c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck());
++  }
++
++  // patch a 16-bit instruction with a float register ranging [0, 31] (5 bits)
++  static void c_patch_reg(address a, unsigned lsb, FloatRegister reg) {
++    c_patch(a, lsb + 4, lsb, reg->encoding_nocheck());
++  }
++
++  // patch a 16-bit instruction with a float register ranging [8, 15] (3 bits)
++  static void c_patch_compressed_reg(address a, unsigned lsb, FloatRegister reg) {
++    c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck());
++  }
++
++// --------------  RVC Instruction Definitions  --------------
++
++  void c_nop() {
++    c_addi(x0, 0);
++  }
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd_Rs1, int32_t imm) {                                                  \
++    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
++    c_patch_reg((address)&insn, 7, Rd_Rs1);                                                  \
++    c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5);                                \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_addi,   0b000, 0b01);
++  INSN(c_addiw,  0b001, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(int32_t imm) {                                                                   \
++    assert_cond(is_imm_in_range(imm, 10, 0));                                                \
++    assert_cond((imm & 0b1111) == 0);                                                        \
++    assert_cond(imm != 0);                                                                   \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5);                                  \
++    c_patch((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7);                             \
++    c_patch((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6);                                  \
++    c_patch((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4);                                  \
++    c_patch_reg((address)&insn, 7, sp);                                                      \
++    c_patch((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9);                                \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_addi16sp, 0b011, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd, uint32_t uimm) {                                                    \
++    assert_cond(is_unsigned_imm_in_range(uimm, 10, 0));                                      \
++    assert_cond((uimm & 0b11) == 0);                                                         \
++    assert_cond(uimm != 0);                                                                  \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_compressed_reg((address)&insn, 2, Rd);                                           \
++    c_patch((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3);                                 \
++    c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2);                                 \
++    c_patch((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6);                          \
++    c_patch((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4);                          \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_addi4spn, 0b000, 0b00);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd_Rs1, uint32_t shamt) {                                               \
++    assert_cond(is_unsigned_imm_in_range(shamt, 6, 0));                                      \
++    assert_cond(shamt != 0);                                                                 \
++    assert_cond(Rd_Rs1 != x0);                                                               \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5)));                                \
++    c_patch_reg((address)&insn, 7, Rd_Rs1);                                                  \
++    c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5);                              \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_slli, 0b000, 0b10);
++
++#undef INSN
++
++#define INSN(NAME, funct3, funct2, op)                                                       \
++  void NAME(Register Rd_Rs1, uint32_t shamt) {                                               \
++    assert_cond(is_unsigned_imm_in_range(shamt, 6, 0));                                      \
++    assert_cond(shamt != 0);                                                                 \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5)));                                \
++    c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);                                       \
++    c_patch((address)&insn, 11, 10, funct2);                                                 \
++    c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5);                              \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_srli, 0b100, 0b00, 0b01);
++  INSN(c_srai, 0b100, 0b01, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, funct2, op)                                                       \
++  void NAME(Register Rd_Rs1, int32_t imm) {                                                  \
++    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
++    c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);                                       \
++    c_patch((address)&insn, 11, 10, funct2);                                                 \
++    c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5);                                \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_andi, 0b100, 0b10, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct6, funct2, op)                                                       \
++  void NAME(Register Rd_Rs1, Register Rs2) {                                                 \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_compressed_reg((address)&insn, 2, Rs2);                                          \
++    c_patch((address)&insn, 6, 5, funct2);                                                   \
++    c_patch_compressed_reg((address)&insn, 7, Rd_Rs1);                                       \
++    c_patch((address)&insn, 15, 10, funct6);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_sub,  0b100011, 0b00, 0b01);
++  INSN(c_xor,  0b100011, 0b01, 0b01);
++  INSN(c_or,   0b100011, 0b10, 0b01);
++  INSN(c_and,  0b100011, 0b11, 0b01);
++  INSN(c_subw, 0b100111, 0b00, 0b01);
++  INSN(c_addw, 0b100111, 0b01, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct4, op)                                                               \
++  void NAME(Register Rd_Rs1, Register Rs2) {                                                 \
++    assert_cond(Rd_Rs1 != x0);                                                               \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_reg((address)&insn, 2, Rs2);                                                     \
++    c_patch_reg((address)&insn, 7, Rd_Rs1);                                                  \
++    c_patch((address)&insn, 15, 12, funct4);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_mv,  0b1000, 0b10);
++  INSN(c_add, 0b1001, 0b10);
++
++#undef INSN
++
++#define INSN(NAME, funct4, op)                                                               \
++  void NAME(Register Rs1) {                                                                  \
++    assert_cond(Rs1 != x0);                                                                  \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_reg((address)&insn, 2, x0);                                                      \
++    c_patch_reg((address)&insn, 7, Rs1);                                                     \
++    c_patch((address)&insn, 15, 12, funct4);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_jr,   0b1000, 0b10);
++  INSN(c_jalr, 0b1001, 0b10);
++
++#undef INSN
++
++  typedef void (Assembler::* j_c_insn)(address dest);
++  typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest);
++
++  void wrap_label(Label &L, j_c_insn insn) {
++    if (L.is_bound()) {
++      (this->*insn)(target(L));
++    } else {
++      L.add_patch_at(code(), locator());
++      (this->*insn)(pc());
++    }
++  }
++
++  void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) {
++    if (L.is_bound()) {
++      (this->*insn)(r, target(L));
++    } else {
++      L.add_patch_at(code(), locator());
++      (this->*insn)(r, pc());
++    }
++  }
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(int32_t offset) {                                                                \
++    assert_cond(is_imm_in_range(offset, 11, 1));                                             \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5);                               \
++    c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1);                          \
++    c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7);                               \
++    c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6);                               \
++    c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10);                             \
++    c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8);                        \
++    c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4);                             \
++    c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11);                           \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }                                                                                          \
++  void NAME(address dest) {                                                                  \
++    assert_cond(dest != NULL);                                                               \
++    int64_t distance = dest - pc();                                                          \
++    assert_cond(is_imm_in_range(distance, 11, 1));                                           \
++    c_j(distance);                                                                           \
++  }                                                                                          \
++  void NAME(Label &L) {                                                                      \
++    wrap_label(L, &Assembler::NAME);                                                         \
++  }
++
++  INSN(c_j, 0b101, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rs1, int32_t imm) {                                                     \
++    assert_cond(is_imm_in_range(imm, 8, 1));                                                 \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5);                                  \
++    c_patch((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1);                             \
++    c_patch((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6);                             \
++    c_patch_compressed_reg((address)&insn, 7, Rs1);                                          \
++    c_patch((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3);                           \
++    c_patch((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8);                                \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }                                                                                          \
++  void NAME(Register Rs1, address dest) {                                                    \
++    assert_cond(dest != NULL);                                                               \
++    int64_t distance = dest - pc();                                                          \
++    assert_cond(is_imm_in_range(distance, 8, 1));                                            \
++    NAME(Rs1, distance);                                                                     \
++  }                                                                                          \
++  void NAME(Register Rs1, Label &L) {                                                        \
++    wrap_label(L, Rs1, &Assembler::NAME);                                                    \
++  }
++
++  INSN(c_beqz, 0b110, 0b01);
++  INSN(c_bnez, 0b111, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd, int32_t imm) {                                                      \
++    assert_cond(is_imm_in_range(imm, 18, 0));                                                \
++    assert_cond((imm & 0xfff) == 0);                                                         \
++    assert_cond(imm != 0);                                                                   \
++    assert_cond(Rd != x0 && Rd != x2);                                                       \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12);                           \
++    c_patch_reg((address)&insn, 7, Rd);                                                      \
++    c_patch((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17);                              \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_lui, 0b011, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd, int32_t imm) {                                                      \
++    assert_cond(is_imm_in_range(imm, 6, 0));                                                 \
++    assert_cond(Rd != x0);                                                                   \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 6, 2, (imm & right_n_bits(5)));                                  \
++    c_patch_reg((address)&insn, 7, Rd);                                                      \
++    c_patch((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5);                           \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_li, 0b010, 0b01);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd, uint32_t uimm) {                                                    \
++    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
++    assert_cond((uimm & 0b111) == 0);                                                        \
++    assert_cond(Rd != x0);                                                                   \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6);                            \
++    c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3);                            \
++    c_patch_reg((address)&insn, 7, Rd);                                                      \
++    c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_ldsp,  0b011, 0b10);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(FloatRegister Rd, uint32_t uimm) {                                               \
++    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
++    assert_cond((uimm & 0b111) == 0);                                                        \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6);                            \
++    c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3);                            \
++    c_patch_reg((address)&insn, 7, Rd);                                                      \
++    c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_fldsp, 0b001, 0b10);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op, REGISTER_TYPE)                                                \
++  void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) {                             \
++    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
++    assert_cond((uimm & 0b111) == 0);                                                        \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_compressed_reg((address)&insn, 2, Rd_Rs2);                                       \
++    c_patch((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6);                            \
++    c_patch_compressed_reg((address)&insn, 7, Rs1);                                          \
++    c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_ld,  0b011, 0b00, Register);
++  INSN(c_sd,  0b111, 0b00, Register);
++  INSN(c_fld, 0b001, 0b00, FloatRegister);
++  INSN(c_fsd, 0b101, 0b00, FloatRegister);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op, REGISTER_TYPE)                                                \
++  void NAME(REGISTER_TYPE Rs2, uint32_t uimm) {                                              \
++    assert_cond(is_unsigned_imm_in_range(uimm, 9, 0));                                       \
++    assert_cond((uimm & 0b111) == 0);                                                        \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_reg((address)&insn, 2, Rs2);                                                     \
++    c_patch((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6);                            \
++    c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_sdsp,  0b111, 0b10, Register);
++  INSN(c_fsdsp, 0b101, 0b10, FloatRegister);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rs2, uint32_t uimm) {                                                   \
++    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
++    assert_cond((uimm & 0b11) == 0);                                                         \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_reg((address)&insn, 2, Rs2);                                                     \
++    c_patch((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6);                            \
++    c_patch((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2);                           \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_swsp, 0b110, 0b10);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd, uint32_t uimm) {                                                    \
++    assert_cond(is_unsigned_imm_in_range(uimm, 8, 0));                                       \
++    assert_cond((uimm & 0b11) == 0);                                                         \
++    assert_cond(Rd != x0);                                                                   \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6);                            \
++    c_patch((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2);                            \
++    c_patch_reg((address)&insn, 7, Rd);                                                      \
++    c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5);                               \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_lwsp, 0b010, 0b10);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) {                                  \
++    assert_cond(is_unsigned_imm_in_range(uimm, 7, 0));                                       \
++    assert_cond((uimm & 0b11) == 0);                                                         \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch_compressed_reg((address)&insn, 2, Rd_Rs2);                                       \
++    c_patch((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6);                                 \
++    c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2);                                 \
++    c_patch_compressed_reg((address)&insn, 7, Rs1);                                          \
++    c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3);                          \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_lw, 0b010, 0b00);
++  INSN(c_sw, 0b110, 0b00);
++
++#undef INSN
++
++#define INSN(NAME, funct3, op)                                                               \
++  void NAME() {                                                                              \
++    uint16_t insn = 0;                                                                       \
++    c_patch((address)&insn, 1, 0, op);                                                       \
++    c_patch((address)&insn, 11, 2, 0x0);                                                     \
++    c_patch((address)&insn, 12, 12, 0b1);                                                    \
++    c_patch((address)&insn, 15, 13, funct3);                                                 \
++    emit_int16(insn);                                                                        \
++  }
++
++  INSN(c_ebreak, 0b100, 0b10);
++
++#undef INSN
++
++// --------------  RVC Transformation Functions  --------------
++
++// --------------------------
++// Register instructions
++// --------------------------
++#define INSN(NAME)                                                                             \
++  void NAME(Register Rd, Register Rs1, Register Rs2) {                                         \
++    /* add -> c.add */                                                                         \
++    if (do_compress()) {                                                                       \
++      Register src = noreg;                                                                    \
++      if (Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) {      \
++        c_add(Rd, src);                                                                        \
++        return;                                                                                \
++      }                                                                                        \
++    }                                                                                          \
++    _add(Rd, Rs1, Rs2);                                                                        \
++  }
++
++  INSN(add);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
++  void NAME(Register Rd, Register Rs1, Register Rs2) {                                       \
++    /* sub/subw -> c.sub/c.subw */                                                           \
++    if (do_compress() &&                                                                     \
++        (Rd == Rs1 && Rd->is_compressed_valid() && Rs2->is_compressed_valid())) {            \
++      C_NAME(Rd, Rs2);                                                                       \
++      return;                                                                                \
++    }                                                                                        \
++    NORMAL_NAME(Rd, Rs1, Rs2);                                                               \
++  }
++
++  INSN(sub,  c_sub,  _sub);
++  INSN(subw, c_subw, _subw);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
++  void NAME(Register Rd, Register Rs1, Register Rs2) {                                       \
++    /* and/or/xor/addw -> c.and/c.or/c.xor/c.addw */                                         \
++    if (do_compress()) {                                                                     \
++      Register src = noreg;                                                                  \
++      if (Rs1->is_compressed_valid() && Rs2->is_compressed_valid() &&                        \
++        ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) {                                \
++        C_NAME(Rd, src);                                                                     \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    NORMAL_NAME(Rd, Rs1, Rs2);                                                               \
++  }
++
++  INSN(andr, c_and,  _andr);
++  INSN(orr,  c_or,   _orr);
++  INSN(xorr, c_xor,  _xorr);
++  INSN(addw, c_addw, _addw);
++
++#undef INSN
++
++private:
++// some helper functions
++  bool do_compress() const {
++    return UseRVC && in_compressible_region();
++  }
++
++#define FUNC(NAME, funct3, bits)                                                             \
++  bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) {                         \
++    return rs1 == sp &&                                                                      \
++      is_unsigned_imm_in_range(imm12, bits, 0) &&                                            \
++      (intx(imm12) & funct3) == 0x0 &&                                                       \
++      (!ld || rd_rs2 != x0);                                                                 \
++  }                                                                                          \
++
++  FUNC(is_c_ldsdsp,  0b111, 9);
++  FUNC(is_c_lwswsp,  0b011, 8);
++
++#undef FUNC
++
++#define FUNC(NAME, funct3, bits)                                                             \
++  bool NAME(Register rs1, int32_t imm12) {                                                   \
++    return rs1 == sp &&                                                                      \
++      is_unsigned_imm_in_range(imm12, bits, 0) &&                                            \
++      (intx(imm12) & funct3) == 0x0;                                                         \
++  }                                                                                          \
++
++  FUNC(is_c_fldsdsp, 0b111, 9);
++
++#undef FUNC
++
++#define FUNC(NAME, REG_TYPE, funct3, bits)                                                   \
++  bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) {                                  \
++    return rs1->is_compressed_valid() &&                                                     \
++      rd_rs2->is_compressed_valid() &&                                                       \
++      is_unsigned_imm_in_range(imm12, bits, 0) &&                                            \
++      (intx(imm12) & funct3) == 0x0;                                                         \
++  }                                                                                          \
++
++  FUNC(is_c_ldsd,  Register,      0b111, 8);
++  FUNC(is_c_lwsw,  Register,      0b011, 7);
++  FUNC(is_c_fldsd, FloatRegister, 0b111, 8);
++
++#undef FUNC
++
++public:
++// --------------------------
++// Load/store register
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
++    /* lw -> c.lwsp/c.lw */                                                                  \
++    if (do_compress()) {                                                                     \
++      if (is_c_lwswsp(Rs, Rd, offset, true)) {                                               \
++        c_lwsp(Rd, offset);                                                                  \
++        return;                                                                              \
++      } else if (is_c_lwsw(Rs, Rd, offset)) {                                                \
++        c_lw(Rd, Rs, offset);                                                                \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _lw(Rd, Rs, offset);                                                                     \
++  }
++
++  INSN(lw);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
++    /* ld -> c.ldsp/c.ld */                                                                  \
++    if (do_compress()) {                                                                     \
++      if (is_c_ldsdsp(Rs, Rd, offset, true)) {                                               \
++        c_ldsp(Rd, offset);                                                                  \
++        return;                                                                              \
++      } else if (is_c_ldsd(Rs, Rd, offset)) {                                                \
++        c_ld(Rd, Rs, offset);                                                                \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _ld(Rd, Rs, offset);                                                                     \
++  }
++
++  INSN(ld);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                           \
++    /* fld -> c.fldsp/c.fld */                                                               \
++    if (do_compress()) {                                                                     \
++      if (is_c_fldsdsp(Rs, offset)) {                                                        \
++        c_fldsp(Rd, offset);                                                                 \
++        return;                                                                              \
++      } else if (is_c_fldsd(Rs, Rd, offset)) {                                               \
++        c_fld(Rd, Rs, offset);                                                               \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _fld(Rd, Rs, offset);                                                                    \
++  }
++
++  INSN(fld);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
++    /* sd -> c.sdsp/c.sd */                                                                  \
++    if (do_compress()) {                                                                     \
++      if (is_c_ldsdsp(Rs, Rd, offset, false)) {                                              \
++        c_sdsp(Rd, offset);                                                                  \
++        return;                                                                              \
++      } else if (is_c_ldsd(Rs, Rd, offset)) {                                                \
++        c_sd(Rd, Rs, offset);                                                                \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _sd(Rd, Rs, offset);                                                                     \
++  }
++
++  INSN(sd);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
++    /* sw -> c.swsp/c.sw */                                                                  \
++    if (do_compress()) {                                                                     \
++      if (is_c_lwswsp(Rs, Rd, offset, false)) {                                              \
++        c_swsp(Rd, offset);                                                                  \
++        return;                                                                              \
++      } else if (is_c_lwsw(Rs, Rd, offset)) {                                                \
++        c_sw(Rd, Rs, offset);                                                                \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _sw(Rd, Rs, offset);                                                                     \
++  }
++
++  INSN(sw);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(FloatRegister Rd, Register Rs, const int32_t offset) {                           \
++    /* fsd -> c.fsdsp/c.fsd */                                                               \
++    if (do_compress()) {                                                                     \
++      if (is_c_fldsdsp(Rs, offset)) {                                                        \
++        c_fsdsp(Rd, offset);                                                                 \
++        return;                                                                              \
++      } else if (is_c_fldsd(Rs, Rd, offset)) {                                               \
++        c_fsd(Rd, Rs, offset);                                                               \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _fsd(Rd, Rs, offset);                                                                    \
++  }
++
++  INSN(fsd);
++
++#undef INSN
++
++// --------------------------
++// Conditional branch instructions
++// --------------------------
++#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
++  void NAME(Register Rs1, Register Rs2, const int64_t offset) {                              \
++    /* beq/bne -> c.beqz/c.bnez */                                                           \
++    if (do_compress() &&                                                                     \
++        (offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() &&                           \
++        is_imm_in_range(offset, 8, 1))) {                                                    \
++      C_NAME(Rs1, offset);                                                                   \
++      return;                                                                                \
++    }                                                                                        \
++    NORMAL_NAME(Rs1, Rs2, offset);                                                           \
++  }
++
++  INSN(beq, c_beqz, _beq);
++  INSN(bne, c_beqz, _bne);
++
++#undef INSN
++
++// --------------------------
++// Unconditional branch instructions
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, const int32_t offset) {                                             \
++    /* jal -> c.j */                                                                         \
++    if (do_compress() && offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1)) {        \
++      c_j(offset);                                                                           \
++      return;                                                                                \
++    }                                                                                        \
++    _jal(Rd, offset);                                                                        \
++  }
++
++  INSN(jal);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
++    /* jalr -> c.jr/c.jalr */                                                                \
++    if (do_compress() && (offset == 0 && Rs != x0)) {                                        \
++      if (Rd == x1) {                                                                        \
++        c_jalr(Rs);                                                                          \
++        return;                                                                              \
++      } else if (Rd == x0) {                                                                 \
++        c_jr(Rs);                                                                            \
++        return;                                                                              \
++      }                                                                                      \
++    }                                                                                        \
++    _jalr(Rd, Rs, offset);                                                                   \
++  }
++
++  INSN(jalr);
++
++#undef INSN
++
++// --------------------------
++// Miscellaneous Instructions
++// --------------------------
++#define INSN(NAME)                                                     \
++  void NAME() {                                                        \
++    /* ebreak -> c.ebreak */                                           \
++    if (do_compress()) {                                               \
++      c_ebreak();                                                      \
++      return;                                                          \
++    }                                                                  \
++    _ebreak();                                                         \
++  }
++
++  INSN(ebreak);
++
++#undef INSN
++
++#define INSN(NAME)                                                      \
++  void NAME() {                                                         \
++    /* The illegal instruction in RVC is presented by a 16-bit 0. */    \
++    if (do_compress()) {                                                \
++      emit_int16(0);                                                    \
++      return;                                                           \
++    }                                                                   \
++    _halt();                                                            \
++  }
++
++  INSN(halt);
++
++#undef INSN
++
++// --------------------------
++// Immediate Instructions
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, int64_t imm) {                                                      \
++    /* li -> c.li */                                                                         \
++    if (do_compress() && (is_imm_in_range(imm, 6, 0) && Rd != x0)) {                         \
++      c_li(Rd, imm);                                                                         \
++      return;                                                                                \
++    }                                                                                        \
++    _li(Rd, imm);                                                                            \
++  }
++
++  INSN(li);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs1, int32_t imm) {                                        \
++    /* addi -> c.addi/c.nop/c.mv/c.addi16sp/c.addi4spn */                                    \
++    if (do_compress()) {                                                                     \
++      if (Rd == Rs1 && is_imm_in_range(imm, 6, 0)) {                                         \
++        c_addi(Rd, imm);                                                                     \
++        return;                                                                              \
++      } else if (imm == 0 && Rd != x0 && Rs1 != x0) {                                        \
++        c_mv(Rd, Rs1);                                                                       \
++        return;                                                                              \
++      } else if (Rs1 == sp && imm != 0) {                                                    \
++        if (Rd == Rs1 && (imm & 0b1111) == 0x0 && is_imm_in_range(imm, 10, 0)) {             \
++          c_addi16sp(imm);                                                                   \
++          return;                                                                            \
++        } else if (Rd->is_compressed_valid() && (imm & 0b11) == 0x0 && is_unsigned_imm_in_range(imm, 10, 0)) { \
++          c_addi4spn(Rd, imm);                                                               \
++          return;                                                                            \
++        }                                                                                    \
++      }                                                                                      \
++    }                                                                                        \
++    _addi(Rd, Rs1, imm);                                                                     \
++  }
++
++  INSN(addi);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs1, int32_t imm) {                                        \
++    /* addiw -> c.addiw */                                                                   \
++    if (do_compress() && (Rd == Rs1 && Rd != x0 && is_imm_in_range(imm, 6, 0))) {            \
++      c_addiw(Rd, imm);                                                                      \
++      return;                                                                                \
++    }                                                                                        \
++    _addiw(Rd, Rs1, imm);                                                                    \
++  }
++
++  INSN(addiw);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs1, int32_t imm) {                                        \
++    /* and_imm12 -> c.andi */                                                                \
++    if (do_compress() &&                                                                     \
++        (Rd == Rs1 && Rd->is_compressed_valid() && is_imm_in_range(imm, 6, 0))) {            \
++      c_andi(Rd, imm);                                                                       \
++      return;                                                                                \
++    }                                                                                        \
++    _and_imm12(Rd, Rs1, imm);                                                                \
++  }
++
++  INSN(and_imm12);
++
++#undef INSN
++
++// --------------------------
++// Shift Immediate Instructions
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, Register Rs1, unsigned shamt) {                                     \
++    /* slli -> c.slli */                                                                     \
++    if (do_compress() && (Rd == Rs1 && Rd != x0 && shamt != 0)) {                            \
++      c_slli(Rd, shamt);                                                                     \
++      return;                                                                                \
++    }                                                                                        \
++    _slli(Rd, Rs1, shamt);                                                                   \
++  }
++
++  INSN(slli);
++
++#undef INSN
++
++// --------------------------
++#define INSN(NAME, C_NAME, NORMAL_NAME)                                                      \
++  void NAME(Register Rd, Register Rs1, unsigned shamt) {                                     \
++    /* srai/srli -> c.srai/c.srli */                                                         \
++    if (do_compress() && (Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0)) {           \
++      C_NAME(Rd, shamt);                                                                     \
++      return;                                                                                \
++    }                                                                                        \
++    NORMAL_NAME(Rd, Rs1, shamt);                                                             \
++  }
++
++  INSN(srai, c_srai, _srai);
++  INSN(srli, c_srli, _srli);
++
++#undef INSN
++
++// --------------------------
++// Upper Immediate Instruction
++// --------------------------
++#define INSN(NAME)                                                                           \
++  void NAME(Register Rd, int32_t imm) {                                                      \
++    /* lui -> c.lui */                                                                       \
++    if (do_compress() && (Rd != x0 && Rd != x2 && imm != 0 && is_imm_in_range(imm, 18, 0))) { \
++      c_lui(Rd, imm);                                                                        \
++      return;                                                                                \
++    }                                                                                        \
++    _lui(Rd, imm);                                                                           \
++  }
++
++  INSN(lui);
++
++#undef INSN
++
++// ---------------------------------------------------------------------------------------
++
 +  void bgt(Register Rs, Register Rt, const address &dest);
 +  void ble(Register Rs, Register Rt, const address &dest);
 +  void bgtu(Register Rs, Register Rt, const address &dest);
@@ -3273,18 +3757,17 @@ index 000000000..d4da30ed6
 +  void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn);
 +  void wrap_label(Register r, Label &L, jal_jalr_insn insn);
 +
-+  // Computational pseudo instructions
++  // calculate pseudoinstruction
 +  void add(Register Rd, Register Rn, int64_t increment, Register temp = t0);
-+  void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0);
-+  
++  void addw(Register Rd, Register Rn, int64_t increment, Register temp = t0);
 +  void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0);
-+  void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0);
++  void subw(Register Rd, Register Rn, int64_t decrement, Register temp = t0);
 +
 +  // RVB pseudo instructions
 +  // zero extend word
 +  void zext_w(Register Rd, Register Rs);
 +
-+  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
++  Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) {
 +  }
 +
 +  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
@@ -3301,21 +3784,14 @@ index 000000000..d4da30ed6
 +    return is_imm_in_range(imm, 12, 0);
 +  }
 +
-+  // The maximum range of a branch is fixed for the riscv
-+  // architecture.
++  // The maximum range of a branch is fixed for the RISCV architecture.
 +  static const unsigned long branch_range = 1 * M;
 +
 +  static bool reachable_from_branch_at(address branch, address target) {
 +    return uabs(target - branch) < branch_range;
 +  }
 +
-+  static Assembler::SEW elemBytes_to_sew(int esize) {
-+    assert(esize > 0 && esize <= 64 && is_power_of_2(esize), "unsupported element size");
-+    return (Assembler::SEW) exact_log2(esize);
-+  }
-+
 +  virtual ~Assembler() {}
-+
 +};
 +
 +class BiasedLockingCounters;
@@ -3323,12 +3799,12 @@ index 000000000..d4da30ed6
 +#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
 new file mode 100644
-index 000000000..82b825db7
+index 0000000000..7ffe880398
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp
 @@ -0,0 +1,47 @@
 +/*
-+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -3376,14 +3852,14 @@ index 000000000..82b825db7
 +#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP
 diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp
 new file mode 100644
-index 000000000..d0ac7ef46
+index 0000000000..f60e0e38ae
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp
-@@ -0,0 +1,169 @@
+@@ -0,0 +1,165 @@
 +/*
-+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -3409,8 +3885,6 @@ index 000000000..d0ac7ef46
 +#ifndef CPU_RISCV_BYTES_RISCV_HPP
 +#define CPU_RISCV_BYTES_RISCV_HPP
 +
-+#include "memory/allocation.hpp"
-+
 +class Bytes: AllStatic {
 + public:
 +  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
@@ -3457,7 +3931,6 @@ index 000000000..d0ac7ef46
 +               ((u8)(((u4*)p)[0]));
 +
 +      case 2:
-+      case 6:
 +        return ((u8)(((u2*)p)[3]) << 48) |
 +               ((u8)(((u2*)p)[2]) << 32) |
 +               ((u8)(((u2*)p)[1]) << 16) |
@@ -3471,7 +3944,7 @@ index 000000000..d0ac7ef46
 +               ((u8)(p[3]) << 24) |
 +               ((u8)(p[2]) << 16) |
 +               ((u8)(p[1]) <<  8) |
-+                (u8)(p[0]);
++               ((u8)(p[0]));
 +    }
 +  }
 +
@@ -3516,7 +3989,6 @@ index 000000000..d0ac7ef46
 +        break;
 +
 +      case 2:
-+      case 6:
 +        ((u2*)p)[3] = x >> 48;
 +        ((u2*)p)[2] = x >> 32;
 +        ((u2*)p)[1] = x >> 16;
@@ -3546,17 +4018,17 @@ index 000000000..d0ac7ef46
 +  static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); }
 +};
 +
-+#include OS_CPU_HEADER_INLINE(bytes)
++#include OS_CPU_HEADER(bytes)
 +
 +#endif // CPU_RISCV_BYTES_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
 new file mode 100644
-index 000000000..522eedd29
+index 0000000000..12980c12de
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp
-@@ -0,0 +1,352 @@
+@@ -0,0 +1,339 @@
 +/*
-+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -3588,6 +4060,7 @@ index 000000000..522eedd29
 +#include "c1/c1_LIRAssembler.hpp"
 +#include "c1/c1_MacroAssembler.hpp"
 +#include "c1/c1_Runtime1.hpp"
++#include "classfile/javaClasses.hpp"
 +#include "nativeInst_riscv.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "vmreg_riscv.inline.hpp"
@@ -3595,8 +4068,7 @@ index 000000000..522eedd29
 +
 +#define __ ce->masm()->
 +
-+void CounterOverflowStub::emit_code(LIR_Assembler* ce)
-+{
++void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
 +  __ bind(_entry);
 +  Metadata *m = _method->as_constant_ptr()->as_metadata();
 +  __ mov_metadata(t0, m);
@@ -3608,22 +4080,19 @@ index 000000000..522eedd29
 +  __ j(_continuation);
 +}
 +
-+RangeCheckStub::RangeCheckStub(CodeEmitInfo *info, LIR_Opr index, LIR_Opr array)
-+  : _index(index), _array(array), _throw_index_out_of_bounds_exception(false)
-+{
++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
++  : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) {
 +  assert(info != NULL, "must have info");
 +  _info = new CodeEmitInfo(info);
 +}
 +
 +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
-+  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true)
-+{
++  : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) {
 +  assert(info != NULL, "must have info");
 +  _info = new CodeEmitInfo(info);
 +}
 +
-+void RangeCheckStub::emit_code(LIR_Assembler* ce)
-+{
++void RangeCheckStub::emit_code(LIR_Assembler* ce) {
 +  __ bind(_entry);
 +  if (_info->deoptimize_on_exception()) {
 +    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
@@ -3655,13 +4124,11 @@ index 000000000..522eedd29
 +  debug_only(__ should_not_reach_here());
 +}
 +
-+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info)
-+{
++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
 +  _info = new CodeEmitInfo(info);
 +}
 +
-+void PredicateFailedStub::emit_code(LIR_Assembler* ce)
-+{
++void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
 +  __ bind(_entry);
 +  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
 +  __ far_call(RuntimeAddress(a));
@@ -3670,8 +4137,7 @@ index 000000000..522eedd29
 +  debug_only(__ should_not_reach_here());
 +}
 +
-+void DivByZeroStub::emit_code(LIR_Assembler* ce)
-+{
++void DivByZeroStub::emit_code(LIR_Assembler* ce) {
 +  if (_offset != -1) {
 +    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
 +  }
@@ -3685,21 +4151,19 @@ index 000000000..522eedd29
 +}
 +
 +// Implementation of NewInstanceStub
-+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id)
-+{
++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
 +  _result = result;
 +  _klass = klass;
 +  _klass_reg = klass_reg;
 +  _info = new CodeEmitInfo(info);
-+  assert(stub_id == Runtime1::new_instance_id ||
-+         stub_id == Runtime1::fast_new_instance_id ||
++  assert(stub_id == Runtime1::new_instance_id                 ||
++         stub_id == Runtime1::fast_new_instance_id            ||
 +         stub_id == Runtime1::fast_new_instance_init_check_id,
 +         "need new_instance id");
 +  _stub_id = stub_id;
 +}
 +
-+void NewInstanceStub::emit_code(LIR_Assembler* ce)
-+{
++void NewInstanceStub::emit_code(LIR_Assembler* ce) {
 +  assert(__ rsp_offset() == 0, "frame size should be fixed");
 +  __ bind(_entry);
 +  __ mv(x13, _klass_reg->as_register());
@@ -3711,16 +4175,14 @@ index 000000000..522eedd29
 +}
 +
 +// Implementation of NewTypeArrayStub
-+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info)
-+{
++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
 +  _klass_reg = klass_reg;
 +  _length = length;
 +  _result = result;
 +  _info = new CodeEmitInfo(info);
 +}
 +
-+void NewTypeArrayStub::emit_code(LIR_Assembler* ce)
-+{
++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
 +  assert(__ rsp_offset() == 0, "frame size should be fixed");
 +  __ bind(_entry);
 +  assert(_length->as_register() == x9, "length must in x9");
@@ -3733,16 +4195,14 @@ index 000000000..522eedd29
 +}
 +
 +// Implementation of NewObjectArrayStub
-+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info)
-+{
++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
 +  _klass_reg = klass_reg;
 +  _result = result;
 +  _length = length;
 +  _info = new CodeEmitInfo(info);
 +}
 +
-+void NewObjectArrayStub::emit_code(LIR_Assembler* ce)
-+{
++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
 +  assert(__ rsp_offset() == 0, "frame size should be fixed");
 +  __ bind(_entry);
 +  assert(_length->as_register() == x9, "length must in x9");
@@ -3756,13 +4216,11 @@ index 000000000..522eedd29
 +
 +// Implementation of MonitorAccessStubs
 +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
-+: MonitorAccessStub(obj_reg, lock_reg)
-+{
++: MonitorAccessStub(obj_reg, lock_reg) {
 +  _info = new CodeEmitInfo(info);
 +}
 +
-+void MonitorEnterStub::emit_code(LIR_Assembler* ce)
-+{
++void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
 +  assert(__ rsp_offset() == 0, "frame size should be fixed");
 +  __ bind(_entry);
 +  ce->store_parameter(_obj_reg->as_register(),  1);
@@ -3779,8 +4237,7 @@ index 000000000..522eedd29
 +  __ j(_continuation);
 +}
 +
-+void MonitorExitStub::emit_code(LIR_Assembler* ce)
-+{
++void MonitorExitStub::emit_code(LIR_Assembler* ce) {
 +  __ bind(_entry);
 +  if (_compute_lock) {
 +    // lock_reg was destroyed by fast unlocking attempt => recompute it
@@ -3798,18 +4255,23 @@ index 000000000..522eedd29
 +  __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
 +}
 +
++// Implementation of patching:
++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
++// - Replace original code with a call to the stub
++// At Runtime:
++// - call to stub, jump to runtime
++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
++// - in runtime: after initializing class, restore original code, reexecute instruction
++
 +int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
 +
 +void PatchingStub::align_patch_site(MacroAssembler* masm) {}
 +
-+// RISCV don't use C1 runtime patching. When need patch, just deoptimize.
-+void PatchingStub::emit_code(LIR_Assembler* ce)
-+{
++void PatchingStub::emit_code(LIR_Assembler* ce) {
 +  assert(false, "RISCV should not use C1 runtime patching");
 +}
 +
-+void DeoptimizeStub::emit_code(LIR_Assembler* ce)
-+{
++void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
 +  __ bind(_entry);
 +  ce->store_parameter(_trap_request, 0);
 +  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
@@ -3817,8 +4279,7 @@ index 000000000..522eedd29
 +  DEBUG_ONLY(__ should_not_reach_here());
 +}
 +
-+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce)
-+{
++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
 +  address a = NULL;
 +  if (_info->deoptimize_on_exception()) {
 +    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
@@ -3835,8 +4296,7 @@ index 000000000..522eedd29
 +  debug_only(__ should_not_reach_here());
 +}
 +
-+void SimpleExceptionStub::emit_code(LIR_Assembler* ce)
-+{
++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
 +  assert(__ rsp_offset() == 0, "frame size should be fixed");
 +
 +  __ bind(_entry);
@@ -3845,19 +4305,17 @@ index 000000000..522eedd29
 +  if (_obj->is_cpu_register()) {
 +    __ mv(t0, _obj->as_register());
 +  }
-+  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), t1);
++  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, t1);
 +  ce->add_call_info_here(_info);
 +  debug_only(__ should_not_reach_here());
 +}
 +
-+void ArrayCopyStub::emit_code(LIR_Assembler* ce)
-+{
++void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
 +  // ---------------slow case: call to native-----------------
 +  __ bind(_entry);
 +  // Figure out where the args should go
 +  // This should really convert the IntrinsicID to the Method* and signature
 +  // but I don't know how to do that.
-+  //
 +  const int args_num = 5;
 +  VMRegPair args[args_num];
 +  BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT };
@@ -3865,12 +4323,11 @@ index 000000000..522eedd29
 +
 +  // push parameters
 +  Register r[args_num];
-+  int i = 0;
-+  r[i++] = src()->as_register();
-+  r[i++] = src_pos()->as_register();
-+  r[i++] = dst()->as_register();
-+  r[i++] = dst_pos()->as_register();
-+  r[i++] = length()->as_register();
++  r[0] = src()->as_register();
++  r[1] = src_pos()->as_register();
++  r[2] = dst()->as_register();
++  r[3] = dst_pos()->as_register();
++  r[4] = length()->as_register();
 +
 +  // next registers will get stored on the stack
 +  for (int j = 0; j < args_num; j++) {
@@ -3879,7 +4336,7 @@ index 000000000..522eedd29
 +      int st_off = r_1->reg2stack() * wordSize;
 +      __ sd(r[j], Address(sp, st_off));
 +    } else {
-+      assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg ");
++      assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg");
 +    }
 +  }
 +
@@ -3899,8 +4356,10 @@ index 000000000..522eedd29
 +  ce->add_call_info_here(info());
 +
 +#ifndef PRODUCT
-+  __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
-+  __ incrementw(Address(t1));
++  if (PrintC1Statistics) {
++    __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
++    __ add_memory_int32(Address(t1), 1);
++  }
 +#endif
 +
 +  __ j(_continuation);
@@ -3909,13 +4368,12 @@ index 000000000..522eedd29
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
 new file mode 100644
-index 000000000..a0f411352
+index 0000000000..4417ad6309
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp
-@@ -0,0 +1,85 @@
+@@ -0,0 +1,84 @@
 +/*
-+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -3973,7 +4431,7 @@ index 000000000..a0f411352
 +
 +  pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
 +  pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan
-+  pd_nof_xmm_regs_linearscan = 0, // like sparc we don't have any of these
++  pd_nof_xmm_regs_linearscan = 0, // don't have vector registers
 +
 +  pd_first_cpu_reg  = 0,
 +  pd_last_cpu_reg   = pd_nof_cpu_regs_reg_alloc - 1,
@@ -4000,13 +4458,12 @@ index 000000000..a0f411352
 +#endif // CPU_RISCV_C1_DEFS_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
 new file mode 100644
-index 000000000..d4876625c
+index 0000000000..e3a2606c53
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp
-@@ -0,0 +1,31 @@
+@@ -0,0 +1,30 @@
 +/*
 + * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -4037,13 +4494,12 @@ index 000000000..d4876625c
 +// No FPU stack on RISCV
 diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
 new file mode 100644
-index 000000000..4b43bc4d7
+index 0000000000..7bc3d31150
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp
-@@ -0,0 +1,33 @@
+@@ -0,0 +1,32 @@
 +/*
-+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -4076,13 +4532,12 @@ index 000000000..4b43bc4d7
 +#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
 new file mode 100644
-index 000000000..94b4e0f0b
+index 0000000000..682ebe8262
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp
-@@ -0,0 +1,391 @@
+@@ -0,0 +1,388 @@
 +/*
-+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -4112,8 +4567,7 @@ index 000000000..94b4e0f0b
 +#include "runtime/sharedRuntime.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +
-+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool)
-+{
++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
 +  LIR_Opr opr = LIR_OprFact::illegalOpr;
 +  VMReg r_1 = reg->first();
 +  VMReg r_2 = reg->second();
@@ -4129,7 +4583,7 @@ index 000000000..94b4e0f0b
 +      Register reg2 = r_2->as_Register();
 +      assert(reg2 == reg1, "must be same register");
 +      opr = as_long_opr(reg1);
-+    } else if (type == T_OBJECT || type == T_ARRAY) {
++    } else if (is_reference_type(type)) {
 +      opr = as_oop_opr(reg1);
 +    } else if (type == T_METADATA) {
 +      opr = as_metadata_opr(reg1);
@@ -4413,7 +4867,7 @@ index 000000000..94b4e0f0b
 +
 +
 +// ----------------mapping-----------------------
-+// all mapping is based on rfp addressing, except for simple leaf methods where we access
++// all mapping is based on fp addressing, except for simple leaf methods where we access
 +// the locals sp based (and no frame is built)
 +
 +
@@ -4430,7 +4884,7 @@ index 000000000..94b4e0f0b
 +//   | .........|  <- TOS
 +//   | locals   |
 +//   +----------+
-+//   |  old fp, |  
++//   |  old fp, |
 +//   +----------+
 +//   | ret addr |
 +//   +----------+
@@ -4458,8 +4912,7 @@ index 000000000..94b4e0f0b
 +  return as_FloatRegister(n)->as_VMReg();
 +}
 +
-+LIR_Opr FrameMap::stack_pointer()
-+{
++LIR_Opr FrameMap::stack_pointer() {
 +  return FrameMap::sp_opr;
 +}
 +
@@ -4473,13 +4926,12 @@ index 000000000..94b4e0f0b
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
 new file mode 100644
-index 000000000..f600c2f6f
+index 0000000000..01281f5c9e
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp
-@@ -0,0 +1,149 @@
+@@ -0,0 +1,148 @@
 +/*
-+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -4628,13 +5080,12 @@ index 000000000..f600c2f6f
 +#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
 new file mode 100644
-index 000000000..a846d60ae
+index 0000000000..2a99d49c94
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp
-@@ -0,0 +1,287 @@
+@@ -0,0 +1,285 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -4671,16 +5122,15 @@ index 000000000..a846d60ae
 +
 +#define __ _masm->
 +
-+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) {
-+
++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal,
++                                    LIR_Opr result, CodeEmitInfo* info) {
 +  // opcode check
 +  assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem");
 +  bool is_irem = (code == lir_irem);
-+
-+  // operand check
-+  assert(left->is_single_cpu(),   "left must be register");
-+  assert(right->is_single_cpu() || right->is_constant(),  "right must be register or constant");
-+  assert(result->is_single_cpu(), "result must be register");
++  // opreand check
++  assert(left->is_single_cpu(), "left must be a register");
++  assert(right->is_single_cpu() || right->is_constant(), "right must be a register or constant");
++  assert(result->is_single_cpu(), "result must be a register");
 +  Register lreg = left->as_register();
 +  Register dreg = result->as_register();
 +
@@ -4754,7 +5204,7 @@ index 000000000..a846d60ae
 +        case lir_sub: __ subw(dreg, lreg, c); break;
 +        default:      ShouldNotReachHere();
 +      }
-+      break;
++    break;
 +    case T_OBJECT:  // fall through
 +    case T_ADDRESS:
 +      switch (code) {
@@ -4762,7 +5212,7 @@ index 000000000..a846d60ae
 +        case lir_sub: __ sub(dreg, lreg, c); break;
 +        default:      ShouldNotReachHere();
 +      }
-+      break;
++    break;
 +    default:
 +      ShouldNotReachHere();
 +  }
@@ -4817,7 +5267,7 @@ index 000000000..a846d60ae
 +    jlong c = right->as_constant_ptr()->as_jlong();
 +    Register dreg = as_reg(dest);
 +    switch (code) {
-+      case lir_add:
++      case lir_add: // fall through
 +      case lir_sub:
 +        if (c == 0 && dreg == lreg_lo) {
 +          COMMENT("effective nop elided");
@@ -4826,12 +5276,12 @@ index 000000000..a846d60ae
 +        code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c);
 +        break;
 +      case lir_div:
-+        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
 +        if (c == 1) {
 +          // move lreg_lo to dreg if divisor is 1
 +          __ mv(dreg, lreg_lo);
 +        } else {
-+          unsigned int shift = exact_log2(c);
++          unsigned int shift = exact_log2_long(c);
 +          // use t0 as intermediate result register
 +          __ srai(t0, lreg_lo, 0x3f);
 +          if (is_imm_in_range(c - 1, 12, 0)) {
@@ -4844,12 +5294,12 @@ index 000000000..a846d60ae
 +        }
 +        break;
 +      case lir_rem:
-+        assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
++        assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
 +        if (c == 1) {
 +          // move 0 to dreg if divisor is 1
 +          __ mv(dreg, zr);
 +        } else {
-+          unsigned int shift = exact_log2(c);
++          unsigned int shift = exact_log2_long(c);
 +          __ srai(t0, lreg_lo, 0x3f);
 +          __ srli(t0, t0, BitsPerLong - shift);
 +          __ add(t1, lreg_lo, t0);
@@ -4874,9 +5324,9 @@ index 000000000..a846d60ae
 +  switch (code) {
 +    case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
 +    case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-+    case lir_mul_strictfp:  // fall through
++    case lir_mul_strictfp: // fall through
 +    case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
-+    case lir_div_strictfp:  // fall through
++    case lir_div_strictfp: // fall through
 +    case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
 +    default:
 +      ShouldNotReachHere();
@@ -4889,9 +5339,9 @@ index 000000000..a846d60ae
 +    switch (code) {
 +      case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
 +      case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-+      case lir_mul_strictfp:  // fall through
++      case lir_mul_strictfp: // fall through
 +      case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
-+      case lir_div_strictfp:  // fall through
++      case lir_div_strictfp: // fall through
 +      case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
 +      default:
 +        ShouldNotReachHere();
@@ -4921,13 +5371,12 @@ index 000000000..a846d60ae
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
 new file mode 100644
-index 000000000..93530ef58
+index 0000000000..ab0a9963fc
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp
-@@ -0,0 +1,36 @@
+@@ -0,0 +1,37 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -4950,6 +5399,7 @@ index 000000000..93530ef58
 + * questions.
 + *
 + */
++
 +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
 +#define CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
 +
@@ -4960,17 +5410,17 @@ index 000000000..93530ef58
 +  void arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest);
 +  void arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, Register lreg, Register dreg);
 +  void arithmetic_idiv(LIR_Op3* op, bool is_irem);
++
 +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
 new file mode 100644
-index 000000000..31f8d6a4a
+index 0000000000..b7f53e395f
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp
-@@ -0,0 +1,387 @@
+@@ -0,0 +1,388 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -4999,6 +5449,7 @@ index 000000000..31f8d6a4a
 +#include "c1/c1_MacroAssembler.hpp"
 +#include "ci/ciArrayKlass.hpp"
 +#include "oops/objArrayKlass.hpp"
++#include "runtime/stubRoutines.hpp"
 +
 +#define __ _masm->
 +
@@ -5026,7 +5477,7 @@ index 000000000..31f8d6a4a
 +  __ mv(c_rarg4, j_rarg4);
 +#ifndef PRODUCT
 +  if (PrintC1Statistics) {
-+    __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
++    __ add_memory_int32(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt), 1);
 +  }
 +#endif
 +  __ far_call(RuntimeAddress(copyfunc_addr));
@@ -5064,14 +5515,14 @@ index 000000000..31f8d6a4a
 +    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
 +      __ load_klass(tmp, dst);
 +      __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset())));
-+      __ mv(t1, Klass::_lh_neutral_value);
++      __ li(t1, Klass::_lh_neutral_value);
 +      __ bge(t0, t1, *stub->entry(), /* is_far */ true);
 +    }
 +
 +    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
 +      __ load_klass(tmp, src);
 +      __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset())));
-+      __ mv(t1, Klass::_lh_neutral_value);
++      __ li(t1, Klass::_lh_neutral_value);
 +      __ bge(t0, t1, *stub->entry(), /* is_far */ true);
 +    }
 +  }
@@ -5133,7 +5584,7 @@ index 000000000..31f8d6a4a
 +  if (PrintC1Statistics) {
 +    Label failed;
 +    __ bnez(x10, failed);
-+    __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt));
++    __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt), 1);
 +    __ bind(failed);
 +  }
 +#endif
@@ -5142,7 +5593,7 @@ index 000000000..31f8d6a4a
 +
 +#ifndef PRODUCT
 +  if (PrintC1Statistics) {
-+    __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt));
++    __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt), 1);
 +  }
 +#endif
 +  assert_different_registers(dst, dst_pos, length, src_pos, src, x10, t0);
@@ -5214,6 +5665,7 @@ index 000000000..31f8d6a4a
 +void LIR_Assembler::arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags) {
 +  assert(default_type != NULL, "NULL default_type!");
 +  BasicType basic_type = default_type->element_type()->basic_type();
++
 +  if (basic_type == T_ARRAY) { basic_type = T_OBJECT; }
 +  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
 +    // Sanity check the known type with the incoming class.  For the
@@ -5269,7 +5721,7 @@ index 000000000..31f8d6a4a
 +  CodeStub* stub = op->stub();
 +  int flags = op->flags();
 +  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
-+  if (basic_type == T_ARRAY) { basic_type = T_OBJECT; }
++  if (is_reference_type(basic_type)) { basic_type = T_OBJECT; }
 +
 +  // if we don't know anything, just go through the generic arraycopy
 +  if (default_type == NULL) {
@@ -5292,7 +5744,7 @@ index 000000000..31f8d6a4a
 +
 +#ifndef PRODUCT
 +  if (PrintC1Statistics) {
-+    __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)));
++    __ add_memory_int32(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)), 1);
 +  }
 +#endif
 +  arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type);
@@ -5356,13 +5808,12 @@ index 000000000..31f8d6a4a
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
 new file mode 100644
-index 000000000..872fd2ef6
+index 0000000000..06a0f248ca
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp
-@@ -0,0 +1,51 @@
+@@ -0,0 +1,52 @@
 +/*
 + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -5388,6 +5839,7 @@ index 000000000..872fd2ef6
 +
 +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
 +#define CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
++
 +  // arraycopy sub functions
 +  void generic_arraycopy(Register src, Register src_pos, Register length,
 +                         Register dst, Register dst_pos, CodeStub *stub);
@@ -5410,17 +5862,18 @@ index 000000000..872fd2ef6
 +                            Register dst, Register dst_pos);
 +  void arraycopy_load_args(Register src, Register src_pos, Register length,
 +                           Register dst, Register dst_pos);
++
 +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
 new file mode 100644
-index 000000000..222e3e97e
+index 0000000000..1e482d7cc2
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
-@@ -0,0 +1,2275 @@
+@@ -0,0 +1,2268 @@
 +/*
 + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -5455,14 +5908,11 @@ index 000000000..222e3e97e
 +#include "ci/ciArrayKlass.hpp"
 +#include "ci/ciInstance.hpp"
 +#include "code/compiledIC.hpp"
-+#include "gc/shared/barrierSet.hpp"
-+#include "gc/shared/cardTableBarrierSet.hpp"
 +#include "gc/shared/collectedHeap.hpp"
 +#include "nativeInst_riscv.hpp"
 +#include "oops/objArrayKlass.hpp"
 +#include "runtime/frame.inline.hpp"
 +#include "runtime/sharedRuntime.hpp"
-+#include "utilities/macros.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +
 +#ifndef PRODUCT
@@ -5512,7 +5962,6 @@ index 000000000..222e3e97e
 +
 +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
 +
-+
 +LIR_Opr LIR_Assembler::receiverOpr() {
 +  return FrameMap::receiver_opr;
 +}
@@ -5521,25 +5970,11 @@ index 000000000..222e3e97e
 +  return FrameMap::as_pointer_opr(receiverOpr()->as_register());
 +}
 +
-+//--------------fpu register translations-----------------------
-+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
-+
-+void LIR_Assembler::reset_FPU() { Unimplemented(); }
-+
-+void LIR_Assembler::fpop() { Unimplemented(); }
-+
-+void LIR_Assembler::fxch(int i) { Unimplemented(); }
-+
-+void LIR_Assembler::fld(int i) { Unimplemented(); }
-+
-+void LIR_Assembler::ffree(int i) { Unimplemented(); }
-+
 +void LIR_Assembler::breakpoint() { Unimplemented(); }
 +
 +void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
 +
 +void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
-+//-------------------------------------------
 +
 +static jlong as_long(LIR_Opr data) {
 +  jlong result;
@@ -5557,6 +5992,43 @@ index 000000000..222e3e97e
 +  return result;
 +}
 +
++Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
++  if (addr->base()->is_illegal()) {
++    assert(addr->index()->is_illegal(), "must be illegal too");
++    __ movptr(tmp, addr->disp());
++    return Address(tmp, 0);
++  }
++
++  Register base = addr->base()->as_pointer_register();
++  LIR_Opr index_opr = addr->index();
++
++  if (index_opr->is_illegal()) {
++    return Address(base, addr->disp());
++  }
++
++  int scale = addr->scale();
++  if (index_opr->is_cpu_register()) {
++    Register index;
++    if (index_opr->is_single_cpu()) {
++      index = index_opr->as_register();
++    } else {
++      index = index_opr->as_register_lo();
++    }
++    if (scale != 0) {
++      __ shadd(tmp, index, base, tmp, scale);
++    } else {
++      __ add(tmp, base, index);
++    }
++    return Address(tmp, addr->disp());
++  } else if (index_opr->is_constant()) {
++    intptr_t addr_offset = (((intptr_t)index_opr->as_constant_ptr()->as_jint()) << scale) + addr->disp();
++    return Address(base, addr_offset);
++  }
++
++  Unimplemented();
++  return Address();
++}
++
 +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
 +  ShouldNotReachHere();
 +  return Address();
@@ -5572,7 +6044,7 @@ index 000000000..222e3e97e
 +
 +// Ensure a valid Address (base + offset) to a stack-slot. If stack access is
 +// not encodable as a base + (immediate) offset, generate an explicit address
-+// calculation to hold the address in a temporary register.
++// calculation to hold the address in t0.
 +Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) {
 +  precond(size == 4 || size == 8);
 +  Address addr = frame_map()->address_for_slot(index, adjust);
@@ -5690,10 +6162,7 @@ index 000000000..222e3e97e
 +int LIR_Assembler::initial_frame_size_in_bytes() const {
 +  // if rounding, must let FrameMap know!
 +
-+  // The frame_map records size in slots (32bit word)
-+
-+  // subtract two words to account for return address and link
-+  return (frame_map()->framesize() - (2 * VMRegImpl::slots_per_word)) * VMRegImpl::stack_slot_size;
++  return in_bytes(frame_map()->framesize_in_bytes());
 +}
 +
 +int LIR_Assembler::emit_exception_handler() {
@@ -6007,7 +6476,7 @@ index 000000000..222e3e97e
 +    }
 +    move_regs(src->as_register(), dest->as_register());
 +  } else if (dest->is_double_cpu()) {
-+    if (src->type() == T_OBJECT || src->type() == T_ARRAY) {
++    if (is_reference_type(src->type())) {
 +      __ verify_oop(src->as_register());
 +      move_regs(src->as_register(), dest->as_register_lo());
 +      return;
@@ -6064,8 +6533,7 @@ index 000000000..222e3e97e
 +  }
 +}
 +
-+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info,
-+                            bool pop_fpu_stack, bool wide, bool /* unaligned */) {
++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
 +  LIR_Address* to_addr = dest->as_address_ptr();
 +  // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src
 +  Register compressed_src = t1;
@@ -6075,7 +6543,7 @@ index 000000000..222e3e97e
 +    return;
 +  }
 +
-+  if (type == T_ARRAY || type == T_OBJECT) {
++  if (is_reference_type(type)) {
 +    __ verify_oop(src->as_register());
 +
 +    if (UseCompressedOops && !wide) {
@@ -6187,8 +6655,7 @@ index 000000000..222e3e97e
 +  reg2stack(temp, dest, dest->type(), false);
 +}
 +
-+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info,
-+                            bool wide, bool /* unaligned */) {
++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
 +  assert(src->is_address(), "should not call otherwise");
 +  assert(dest->is_register(), "should not call otherwise");
 +
@@ -6233,6 +6700,9 @@ index 000000000..222e3e97e
 +      __ ld(dest->as_register(), as_Address(from_addr));
 +      break;
 +    case T_ADDRESS:
++      // FIXME: OMG this is a horrible kludge.  Any offset from an
++      // address that matches klass_offset_in_bytes() will be loaded
++      // as a word, not a long.
 +      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
 +        __ lwu(dest->as_register(), as_Address(from_addr));
 +      } else {
@@ -6261,7 +6731,7 @@ index 000000000..222e3e97e
 +      ShouldNotReachHere();
 +  }
 +
-+  if (type == T_ARRAY || type == T_OBJECT) {
++  if (is_reference_type(type)) {
 +    if (UseCompressedOops && !wide) {
 +      __ decode_heap_oop(dest->as_register());
 +    }
@@ -6275,7 +6745,7 @@ index 000000000..222e3e97e
 +
 +void LIR_Assembler::emit_op3(LIR_Op3* op) {
 +  switch (op->code()) {
-+    case lir_idiv:
++    case lir_idiv: // fall through
 +    case lir_irem:
 +      arithmetic_idiv(op->code(),
 +                      op->in_opr1(),
@@ -6431,8 +6901,8 @@ index 000000000..222e3e97e
 +  Register len = op->len()->as_register();
 +
 +  if (UseSlowPath ||
-+      (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
-+      (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
++      (!UseFastNewObjectArray && is_reference_type(op->type())) ||
++      (!UseFastNewTypeArray   && !is_reference_type(op->type()))) {
 +    __ j(*op->stub()->entry());
 +  } else {
 +    Register tmp1 = op->tmp1()->as_register();
@@ -6467,7 +6937,7 @@ index 000000000..222e3e97e
 +    __ ld(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
 +    __ bne(recv, t1, next_test);
 +    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
-+    __ increment(data_addr, DataLayout::counter_increment);
++    __ add_memory_int64(data_addr, DataLayout::counter_increment);
 +    __ j(*update_done);
 +    __ bind(next_test);
 +  }
@@ -6479,7 +6949,7 @@ index 000000000..222e3e97e
 +    __ ld(t1, recv_addr);
 +    __ bnez(t1, next_test);
 +    __ sd(recv, recv_addr);
-+    __ mv(t1, DataLayout::counter_increment);
++    __ li(t1, DataLayout::counter_increment);
 +    __ sd(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
 +    __ j(*update_done);
 +    __ bind(next_test);
@@ -6505,7 +6975,7 @@ index 000000000..222e3e97e
 +  __ load_klass(klass_RInfo, obj);
 +  if (k->is_loaded()) {
 +    // See if we get an immediate positive hit
-+    __ ld(t0, Address(klass_RInfo, long(k->super_check_offset())));
++    __ ld(t0, Address(klass_RInfo, int64_t(k->super_check_offset())));
 +    if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
 +      __ bne(k_RInfo, t0, *failure_target, /* is_far */ true);
 +      // successful cast, fall through to profile or jump
@@ -6550,10 +7020,7 @@ index 000000000..222e3e97e
 +  // Object is null, update MDO and exit
 +  Register mdo = klass_RInfo;
 +  __ mov_metadata(mdo, md->constant_encoding());
-+  Address data_addr = __ form_address(mdo,   /* base */
-+                                      md->byte_offset_of_slot(data, DataLayout::flags_offset()), /* offset */
-+                                      12,    /* expect offset bits */
-+                                      t1);   /* temp reg */
++  Address data_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
 +  __ lbu(t0, data_addr);
 +  __ ori(t0, t0, BitData::null_seen_byte_constant());
 +  __ sb(t0, data_addr);
@@ -6758,7 +7225,12 @@ index 000000000..222e3e97e
 +  }
 +}
 +
-+void LIR_Assembler::align_call(LIR_Code code) {  }
++void LIR_Assembler::align_call(LIR_Code code) {
++  // With RVC a call instruction may get 2-byte aligned.
++  // The address of the call instruction needs to be 4-byte aligned to
++  // ensure that it does not span a cache line so that it can be patched.
++  __ align(4);
++}
 +
 +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
 +  address call = __ trampoline_call(Address(op->addr(), rtype));
@@ -6778,10 +7250,14 @@ index 000000000..222e3e97e
 +  add_call_info(code_offset(), op->info());
 +}
 +
-+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ShouldNotReachHere(); }
++/* Currently, vtable-dispatch is only enabled for sparc platforms */
++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
++  ShouldNotReachHere();
++}
 +
 +void LIR_Assembler::emit_static_call_stub() {
 +  address call_pc = __ pc();
++  assert((__ offset() % 4) == 0, "bad alignment");
 +  address stub = __ start_a_stub(call_stub_size());
 +  if (stub == NULL) {
 +    bailout("static call stub overflow");
@@ -6793,7 +7269,8 @@ index 000000000..222e3e97e
 +  __ relocate(static_stub_Relocation::spec(call_pc));
 +  __ emit_static_call_stub();
 +
-+  assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), "stub too big");
++  assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size()
++         <= call_stub_size(), "stub too big");
 +  __ end_a_stub();
 +}
 +
@@ -6838,7 +7315,6 @@ index 000000000..222e3e97e
 +  __ j(_unwind_handler_entry);
 +}
 +
-+
 +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
 +  Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
 +  Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
@@ -6866,7 +7342,6 @@ index 000000000..222e3e97e
 +  }
 +}
 +
-+
 +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
 +  Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
 +  Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
@@ -6901,8 +7376,6 @@ index 000000000..222e3e97e
 +  }
 +}
 +
-+
-+
 +void LIR_Assembler::emit_lock(LIR_OpLock* op) {
 +  Register obj = op->obj_opr()->as_register();  // may not be an oop
 +  Register hdr = op->hdr_opr()->as_register();
@@ -6962,7 +7435,7 @@ index 000000000..222e3e97e
 +        ciKlass* receiver = vc_data->receiver(i);
 +        if (known_klass->equals(receiver)) {
 +          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
-+          __ increment(data_addr, DataLayout::counter_increment);
++          __ add_memory_int64(data_addr, DataLayout::counter_increment);
 +          return;
 +        }
 +      }
@@ -6978,7 +7451,7 @@ index 000000000..222e3e97e
 +          __ mov_metadata(t1, known_klass->constant_encoding());
 +          __ sd(t1, recv_addr);
 +          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
-+          __ increment(data_addr, DataLayout::counter_increment);
++          __ add_memory_int64(data_addr, DataLayout::counter_increment);
 +          return;
 +        }
 +      }
@@ -6988,13 +7461,13 @@ index 000000000..222e3e97e
 +      type_profile_helper(mdo, md, data, recv, &update_done);
 +      // Receiver did not match any saved receiver and there is no empty row for it.
 +      // Increment total counter to indicate polymorphic case.
-+      __ increment(counter_addr, DataLayout::counter_increment);
++      __ add_memory_int64(counter_addr, DataLayout::counter_increment);
 +
 +      __ bind(update_done);
 +    }
 +  } else {
 +    // Static call
-+    __ increment(counter_addr, DataLayout::counter_increment);
++    __ add_memory_int64(counter_addr, DataLayout::counter_increment);
 +  }
 +}
 +
@@ -7029,7 +7502,7 @@ index 000000000..222e3e97e
 +
 +    if (TypeEntries::is_type_none(current_klass)) {
 +      __ beqz(t1, none);
-+      __ mv(t0, (u1)TypeEntries::null_seen);
++      __ li(t0, (u1)TypeEntries::null_seen);
 +      __ beq(t0, t1, none);
 +      // There is a chance that the checks above (re-reading profiling
 +      // data from memory) fail if another thread has just set the
@@ -7079,7 +7552,7 @@ index 000000000..222e3e97e
 +    Label ok;
 +    __ ld(t0, mdo_addr);
 +    __ beqz(t0, ok);
-+    __ mv(t1, (u1)TypeEntries::null_seen);
++    __ li(t1, (u1)TypeEntries::null_seen);
 +    __ beq(t0, t1, ok);
 +    // may have been set by another thread
 +    __ membar(MacroAssembler::LoadLoad);
@@ -7205,26 +7678,27 @@ index 000000000..222e3e97e
 +    return;
 +  }
 +#endif
++
 +  assert(patch_code == lir_patch_none, "Patch code not supported");
 +  LIR_Address* adr = addr->as_address_ptr();
 +  Register dst = dest->as_register_lo();
 +
 +  assert_different_registers(dst, t0);
-+  if(adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) {
-+
++  if (adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) {
++    int scale = adr->scale();
 +    intptr_t offset = adr->disp();
 +    LIR_Opr index_op = adr->index();
-+    int scale = adr->scale();
-+    if(index_op->is_constant()) {
++    if (index_op->is_constant()) {
 +      offset += ((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale;
 +    }
 +
-+    if(!is_imm_in_range(offset, 12, 0)) {
++    if (!is_imm_in_range(offset, 12, 0)) {
 +      __ la(t0, as_Address(adr));
 +      __ mv(dst, t0);
 +      return;
 +    }
 +  }
++
 +  __ la(dst, as_Address(adr));
 +}
 +
@@ -7248,8 +7722,7 @@ index 000000000..222e3e97e
 +
 +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
 +  if (dest->is_address() || src->is_address()) {
-+    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false,
-+           /* unaligned */ false, /* wide */ false);
++    move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false);
 +  } else {
 +    ShouldNotReachHere();
 +  }
@@ -7326,7 +7799,7 @@ index 000000000..222e3e97e
 +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) {
 +  Address addr = as_Address(src->as_address_ptr());
 +  BasicType type = src->type();
-+  bool is_oop = type == T_OBJECT || type == T_ARRAY;
++  bool is_oop = is_reference_type(type);
 +
 +  get_op(type);
 +
@@ -7376,41 +7849,6 @@ index 000000000..222e3e97e
 +  return exact_log2(elem_size);
 +}
 +
-+Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
-+  if (addr->base()->is_illegal()) {
-+    assert(addr->index()->is_illegal(), "must be illegal too");
-+    __ movptr(tmp, addr->disp());
-+    return Address(tmp, 0);
-+  }
-+
-+  Register base = addr->base()->as_pointer_register();
-+  LIR_Opr index_op = addr->index();
-+  int scale = addr->scale();
-+
-+  if (index_op->is_illegal()) {
-+    return Address(base, addr->disp());
-+  } else if (index_op->is_cpu_register()) {
-+    Register index;
-+    if (index_op->is_single_cpu()) {
-+      index = index_op->as_register();
-+    } else {
-+      index = index_op->as_register_lo();
-+    }
-+    if (scale != 0) {
-+      __ shadd(tmp, index, base, tmp, scale);
-+    } else {
-+      __ add(tmp, base, index);
-+    }
-+    return Address(tmp, addr->disp());
-+  } else if (index_op->is_constant()) {
-+    intptr_t addr_offset = (((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale) + addr->disp();
-+    return Address(base, addr_offset);
-+  }
-+
-+  Unimplemented();
-+  return Address();
-+}
-+
 +// helper functions which checks for overflow and sets bailout if it
 +// occurs.  Always returns a valid embeddable pointer but in the
 +// bailout case the pointer won't be to unique storage.
@@ -7444,15 +7882,17 @@ index 000000000..222e3e97e
 +  }
 +}
 +
-+void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
-+  _masm->code_section()->relocate(adr, relocInfo::poll_type);
-+  int pc_offset = code_offset();
-+  flush_debug_info(pc_offset);
-+  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
-+  if (info->exception_handlers() != NULL) {
-+    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
-+  }
-+}
++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
++
++void LIR_Assembler::reset_FPU() { Unimplemented(); }
++
++void LIR_Assembler::fpop() { Unimplemented(); }
++
++void LIR_Assembler::fxch(int i) { Unimplemented(); }
++
++void LIR_Assembler::fld(int i) { Unimplemented(); }
++
++void LIR_Assembler::ffree(int i) { Unimplemented(); }
 +
 +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
 +  __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */,
@@ -7498,7 +7938,6 @@ index 000000000..222e3e97e
 +  add_call_info_here(info);
 +}
 +
-+
 +void LIR_Assembler::check_exact_klass(Register tmp, ciKlass* exact_klass) {
 +  Label ok;
 +  __ load_klass(tmp, tmp);
@@ -7588,6 +8027,16 @@ index 000000000..222e3e97e
 +  __ bind(done);
 +}
 +
++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
++  _masm->code_section()->relocate(adr, relocInfo::poll_type);
++  int pc_offset = code_offset();
++  flush_debug_info(pc_offset);
++  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
++  if (info->exception_handlers() != NULL) {
++    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
++  }
++}
++
 +void LIR_Assembler::type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo,
 +                                 ciProfileData* data, Label* success, Label* failure,
 +                                 Label& profile_cast_success, Label& profile_cast_failure) {
@@ -7602,10 +8051,7 @@ index 000000000..222e3e97e
 +
 +  __ bind(profile_cast_failure);
 +  __ mov_metadata(mdo, md->constant_encoding());
-+  Address counter_addr = __ form_address(mdo,   /* base */
-+                                         md->byte_offset_of_slot(data, CounterData::count_offset()), /* offset */
-+                                         12,    /* expect offset bits */
-+                                         t1);  /* temp reg */
++  Address counter_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
 +  __ ld(t0, counter_addr);
 +  __ addi(t0, t0, -DataLayout::counter_increment);
 +  __ sd(t0, counter_addr);
@@ -7687,21 +8133,21 @@ index 000000000..222e3e97e
 +  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
 +  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
 +  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
-+  __ mv(t0, c);
++  __ li(t0, c);
 +  __ sd(t0, Address(sp, offset_from_rsp_in_bytes));
 +}
 +
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
 new file mode 100644
-index 000000000..11a47fd6e
+index 0000000000..5c81f1c704
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp
-@@ -0,0 +1,132 @@
+@@ -0,0 +1,133 @@
 +/*
-+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -7743,9 +8189,6 @@ index 000000000..11a47fd6e
 +
 +  Address as_Address(LIR_Address* addr, Register tmp);
 +
-+  // Ensure we have a valid Address (base+offset) to a stack-slot.
-+  Address stack_slot_address(int index, uint shift, int adjust = 0);
-+
 +  // helper functions which checks for overflow and sets bailout if it
 +  // occurs.  Always returns a valid embeddable pointer but in the
 +  // bailout case the pointer won't be to unique storage.
@@ -7753,6 +8196,9 @@ index 000000000..11a47fd6e
 +  address double_constant(double d);
 +  address int_constant(jlong n);
 +
++  // Ensure we have a valid Address (base + offset) to a stack-slot.
++  Address stack_slot_address(int index, uint shift, int adjust = 0);
++
 +  // Record the type of the receiver in ReceiverTypeData
 +  void type_profile_helper(Register mdo,
 +                           ciMethodData *md, ciProfileData *data,
@@ -7768,17 +8214,16 @@ index 000000000..11a47fd6e
 +
 +  void deoptimize_trap(CodeEmitInfo *info);
 +
-+  enum
-+  {
-+    // see emit_static_call_stub for detail:
++  enum {
++    // See emit_static_call_stub for detail
 +    // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address)
 +    _call_stub_size = 14 * NativeInstruction::instruction_size +
 +                      (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size),
 +    _call_aot_stub_size = 0,
-+    // see emit_exception_handler for detail:
++    // See emit_exception_handler for detail
 +    // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
 +    _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
-+    // see emit_deopt_handler for detail
++    // See emit_deopt_handler for detail
 +    // auipc (1) + far_jump (6 or 2)
 +    _deopt_handler_size = 1 * NativeInstruction::instruction_size +
 +                          6 * NativeInstruction::instruction_size // or smaller
@@ -7789,10 +8234,12 @@ index 000000000..11a47fd6e
 +  void check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, Address mdo_addr, Label &next);
 +
 +  void check_exact_klass(Register tmp, ciKlass* exact_klass);
++
 +  void check_null(Register tmp, Label &update, intptr_t current_klass, Address mdo_addr, bool do_update, Label &next);
 +
 +  void (MacroAssembler::*add)(Register prev, RegisterOrConstant incr, Register addr);
 +  void (MacroAssembler::*xchg)(Register prev, Register newv, Register addr);
++
 +  void get_op(BasicType type);
 +
 +  // emit_typecheck_helper sub functions
@@ -7832,12 +8279,12 @@ index 000000000..11a47fd6e
 +#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
 new file mode 100644
-index 000000000..8ba9ed66d
+index 0000000000..c41819fc2a
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp
-@@ -0,0 +1,1083 @@
+@@ -0,0 +1,1094 @@
 +/*
-+ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -7980,7 +8427,6 @@ index 000000000..8ba9ed66d
 +  return false;
 +}
 +
-+
 +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
 +  if (c->as_constant() != NULL) {
 +    long constant = 0;
@@ -7996,7 +8442,6 @@ index 000000000..8ba9ed66d
 +  return false;
 +}
 +
-+
 +LIR_Opr LIRGenerator::safepoint_poll_register() {
 +  return LIR_OprFact::illegalOpr;
 +}
@@ -8004,7 +8449,7 @@ index 000000000..8ba9ed66d
 +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
 +                                            int shift, int disp, BasicType type) {
 +  assert(base->is_register(), "must be");
-+  
++
 +  if (index->is_constant()) {
 +    LIR_Const *constant = index->as_constant_ptr();
 +    jlong c;
@@ -8031,17 +8476,23 @@ index 000000000..8ba9ed66d
 +  int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
 +  int elem_size = type2aelembytes(type);
 +  int shift = exact_log2(elem_size);
-+  
 +  return generate_address(array_opr, index_opr, shift, offset_in_bytes, type);
 +}
 +
 +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
++  LIR_Opr r;
 +  switch (type) {
-+    case T_LONG: return LIR_OprFact::longConst(x);
-+    case T_INT:  return LIR_OprFact::intConst(x);
-+    default:     ShouldNotReachHere();
++    case T_LONG:
++      r = LIR_OprFact::longConst(x);
++      break;
++    case T_INT:
++      r = LIR_OprFact::intConst(x);
++      break;
++    default:
++      ShouldNotReachHere();
++      r = NULL;
 +  }
-+  return NULL;
++  return r;
 +}
 +
 +void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
@@ -8111,10 +8562,10 @@ index 000000000..8ba9ed66d
 +
 +  // "lock" stores the address of the monitor stack slot, so this is not an oop
 +  LIR_Opr lock = new_register(T_INT);
-+  // Need a tmp register for biased locking
-+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
++  // Need a scratch register for biased locking
++  LIR_Opr scratch = LIR_OprFact::illegalOpr;
 +  if (UseBiasedLocking) {
-+    tmp = new_register(T_INT);
++    scratch = new_register(T_INT);
 +  }
 +
 +  CodeEmitInfo* info_for_exception = NULL;
@@ -8124,7 +8575,7 @@ index 000000000..8ba9ed66d
 +  // this CodeEmitInfo must not have the xhandlers because here the
 +  // object is already locked (xhandlers expect object to be unlocked)
 +  CodeEmitInfo* info = state_for(x, x->state(), true);
-+  monitor_enter(obj.result(), lock, syncTempOpr(), tmp,
++  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
 +                x->monitor_no(), info_for_exception, info);
 +}
 +
@@ -8208,7 +8659,7 @@ index 000000000..8ba9ed66d
 +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
 +
 +  // missing test if instr is commutative and if we should swap
-+  LIRItem left(x->x(),  this);
++  LIRItem left(x->x(), this);
 +  LIRItem right(x->y(), this);
 +
 +  if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
@@ -8221,7 +8672,7 @@ index 000000000..8ba9ed66d
 +      // no need to do div-by-zero check if the divisor is a non-zero constant
 +      if (c != 0) { need_zero_check = false; }
 +      // do not load right if the divisor is a power-of-2 constant
-+      if (c > 0 && is_power_of_2(c)) {
++      if (c > 0 && is_power_of_2_long(c)) {
 +        right.dont_load_item();
 +      } else {
 +        right.load_item();
@@ -8232,7 +8683,7 @@ index 000000000..8ba9ed66d
 +    if (need_zero_check) {
 +      CodeEmitInfo* info = state_for(x);
 +      __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
-+      __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info));
++      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
 +    }
 +
 +    rlock_result(x);
@@ -8306,16 +8757,16 @@ index 000000000..8ba9ed66d
 +    if (need_zero_check) {
 +      CodeEmitInfo* info = state_for(x);
 +      __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
-+      __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info));
++      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
 +    }
 +
 +    LIR_Opr ill = LIR_OprFact::illegalOpr;
-+
 +    if (x->op() == Bytecodes::_irem) {
 +      __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
 +    } else if (x->op() == Bytecodes::_idiv) {
 +      __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
 +    }
++
 +  } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
 +    if (right.is_constant() &&
 +        ((x->op() == Bytecodes::_iadd && !Assembler::operand_valid_for_add_immediate(right.get_jint_constant())) ||
@@ -8389,7 +8840,7 @@ index 000000000..8ba9ed66d
 +  left.load_item();
 +  rlock_result(x);
 +  ValueTag tag = right.type()->tag();
-+  if(right.is_constant() &&
++  if (right.is_constant() &&
 +     ((tag == longTag && Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) ||
 +      (tag == intTag && Assembler::operand_valid_for_add_immediate(right.get_jint_constant()))))  {
 +    right.dont_load_item();
@@ -8438,7 +8889,7 @@ index 000000000..8ba9ed66d
 +  new_value.load_item();
 +  cmp_value.load_item();
 +  LIR_Opr result = new_register(T_INT);
-+  if (type == T_OBJECT || type == T_ARRAY) {
++  if (is_reference_type(type)) {
 +    __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result);
 +  } else if (type == T_INT) {
 +    __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill);
@@ -8452,7 +8903,7 @@ index 000000000..8ba9ed66d
 +}
 +
 +LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) {
-+  bool is_oop = type == T_OBJECT || type == T_ARRAY;
++  bool is_oop = is_reference_type(type);
 +  LIR_Opr result = new_register(type);
 +  value.load_item();
 +  assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type");
@@ -8513,19 +8964,30 @@ index 000000000..8ba9ed66d
 +void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
 +  LIRItem value(x->argument_at(0), this);
 +  value.set_destroys_register();
++
 +  LIR_Opr calc_result = rlock_result(x);
 +  LIR_Opr result_reg = result_register_for(x->type());
++
 +  CallingConvention* cc = NULL;
-+  BasicTypeList signature(1);
-+  signature.append(T_DOUBLE);
-+  if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); }
-+  cc = frame_map()->c_calling_convention(&signature);
-+  value.load_item_force(cc->at(0));
++
 +  if (x->id() == vmIntrinsics::_dpow) {
 +    LIRItem value1(x->argument_at(1), this);
++
 +    value1.set_destroys_register();
++
++    BasicTypeList signature(2);
++    signature.append(T_DOUBLE);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
 +    value1.load_item_force(cc->at(1));
++  } else {
++    BasicTypeList signature(1);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
 +  }
++
 +  switch (x->id()) {
 +    case vmIntrinsics::_dexp:
 +      if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); }
@@ -8913,20 +9375,16 @@ index 000000000..8ba9ed66d
 +
 +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
 +                                       CodeEmitInfo* info) {
-+  if (!UseBarriersForVolatile) {
-+    __ membar();
-+  }
-+
 +  __ volatile_load_mem_reg(address, result, info);
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
 new file mode 100644
-index 000000000..00e33e882
+index 0000000000..0317ed9003
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp
 @@ -0,0 +1,55 @@
 +/*
-+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -8976,18 +9434,18 @@ index 000000000..00e33e882
 +void LIR_Address::verify() const {
 +  assert(base()->is_cpu_register(), "wrong base operand");
 +  assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand");
-+  assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA,
-+         "wrong type for addresses");
++  assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || base()->type() == T_LONG ||
++         base()->type() == T_METADATA, "wrong type for addresses");
 +}
 +#endif // PRODUCT
 diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
 new file mode 100644
-index 000000000..60dcdc0e1
+index 0000000000..78a61128bd
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp
 @@ -0,0 +1,33 @@
 +/*
-+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -9021,14 +9479,14 @@ index 000000000..60dcdc0e1
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
 new file mode 100644
-index 000000000..f0aa08a39
+index 0000000000..d7ca7b0fd0
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp
-@@ -0,0 +1,85 @@
+@@ -0,0 +1,83 @@
 +/*
-+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -9063,7 +9521,6 @@ index 000000000..f0aa08a39
 +  return 1;
 +}
 +
-+
 +inline bool LinearScan::requires_adjacent_regs(BasicType type) {
 +  return false;
 +}
@@ -9085,8 +9542,8 @@ index 000000000..f0aa08a39
 +  return false;
 +}
 +
-+
 +inline void LinearScan::pd_add_temps(LIR_Op* op) {
++  // No special case behaviours yet
 +}
 +
 +
@@ -9099,8 +9556,8 @@ index 000000000..f0aa08a39
 +    _first_reg = pd_first_callee_saved_reg;
 +    _last_reg = pd_last_callee_saved_reg;
 +    return true;
-+  } else if (cur->type() == T_INT || cur->type() == T_LONG ||
-+             cur->type() == T_OBJECT || cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
++  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT ||
++             cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
 +    _first_reg = pd_first_cpu_reg;
 +    _last_reg = pd_last_allocatable_cpu_reg;
 +    return true;
@@ -9108,18 +9565,17 @@ index 000000000..f0aa08a39
 +  return false;
 +}
 +
-+
 +#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
 new file mode 100644
-index 000000000..370ec45c6
+index 0000000000..99d981f97f
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
-@@ -0,0 +1,441 @@
+@@ -0,0 +1,443 @@
 +/*
-+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -9143,13 +9599,14 @@ index 000000000..370ec45c6
 + */
 +
 +#include "precompiled.hpp"
++#include "c1/c1_LIR.hpp"
 +#include "c1/c1_MacroAssembler.hpp"
 +#include "c1/c1_Runtime1.hpp"
 +#include "classfile/systemDictionary.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
 +#include "gc/shared/collectedHeap.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "oops/arrayOop.hpp"
-+#include "oops/markOop.hpp"
 +#include "runtime/basicLock.hpp"
 +#include "runtime/biasedLocking.hpp"
 +#include "runtime/os.hpp"
@@ -9167,7 +9624,7 @@ index 000000000..370ec45c6
 +  }
 +}
 +
-+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case) {
++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
 +  const int aligned_mask = BytesPerWord - 1;
 +  const int hdr_offset = oopDesc::mark_offset_in_bytes();
 +  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
@@ -9180,8 +9637,8 @@ index 000000000..370ec45c6
 +  sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
 +
 +  if (UseBiasedLocking) {
-+    assert(tmp != noreg, "should have tmp register at this point");
-+    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, tmp, false, done, &slow_case);
++    assert(scratch != noreg, "should have scratch register at this point");
++    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
 +  } else {
 +    null_check_offset = offset();
 +  }
@@ -9212,7 +9669,7 @@ index 000000000..370ec45c6
 +  // assuming both the stack pointer and page_size have their least
 +  // significant 2 bits cleared and page_size is a power of 2
 +  sub(hdr, hdr, sp);
-+  mv(t0, aligned_mask - os::vm_page_size());
++  li(t0, aligned_mask - os::vm_page_size());
 +  andr(hdr, hdr, t0);
 +  // for recursive locking, the result is zero => save it in the displaced header
 +  // location (NULL in the displaced hdr location indicates recursive locking)
@@ -9222,7 +9679,7 @@ index 000000000..370ec45c6
 +  bind(done);
 +  if (PrintBiasedLockingStatistics) {
 +    la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
-+    incrementw(Address(t1, 0));
++    add_memory_int32(Address(t1, 0), 1);
 +  }
 +  return null_check_offset;
 +}
@@ -9298,7 +9755,7 @@ index 000000000..370ec45c6
 +}
 +
 +// preserves obj, destroys len_in_bytes
-+void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1) {
++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp) {
 +  assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
 +  Label done;
 +
@@ -9310,7 +9767,7 @@ index 000000000..370ec45c6
 +  if (hdr_size_in_bytes) {
 +    add(obj, obj, hdr_size_in_bytes);
 +  }
-+  zero_memory(obj, len_in_bytes, tmp1);
++  zero_memory(obj, len_in_bytes, tmp);
 +  if (hdr_size_in_bytes) {
 +    sub(obj, obj, hdr_size_in_bytes);
 +  }
@@ -9435,19 +9892,20 @@ index 000000000..370ec45c6
 +
 +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
 +  // If we have to make this method not-entrant we'll overwrite its
-+  // first instruction with a jump.  For this action to be legal we
++  // first instruction with a jump. For this action to be legal we
 +  // must ensure that this first instruction is a J, JAL or NOP.
 +  // Make it a NOP.
 +  nop();
++
 +  assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
 +  // Make sure there is enough stack space for this method's activation.
-+  // Note that we do this before doing an enter().
++  // Note that we do this before creating a frame.
 +  generate_stack_overflow_check(bang_size_in_bytes);
-+  MacroAssembler::build_frame(framesize + 2 * wordSize); // 2: multipler for wordSize
++  MacroAssembler::build_frame(framesize);
 +}
 +
 +void C1_MacroAssembler::remove_frame(int framesize) {
-+  MacroAssembler::remove_frame(framesize + 2 * wordSize); // 2: multiper for wordSize
++  MacroAssembler::remove_frame(framesize);
 +}
 +
 +
@@ -9539,9 +9997,9 @@ index 000000000..370ec45c6
 +  if (type == T_OBJECT || type == T_ARRAY) {
 +    assert(cmpFlag == lir_cond_equal || cmpFlag == lir_cond_notEqual, "Should be equal or notEqual");
 +    if (cmpFlag == lir_cond_equal) {
-+      oop_equal(op1, op2, label, is_far);
++      beq(op1, op2, label, is_far);
 +    } else {
-+      oop_nequal(op1, op2, label, is_far);
++      bne(op1, op2, label, is_far);
 +    }
 +  } else {
 +    assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])),
@@ -9559,14 +10017,14 @@ index 000000000..370ec45c6
 +}
 diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
 new file mode 100644
-index 000000000..5d0cefe89
+index 0000000000..1950cee5dd
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
 @@ -0,0 +1,121 @@
 +/*
-+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -9614,7 +10072,7 @@ index 000000000..5d0cefe89
 +  );
 +
 +  void initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2);
-+  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1);
++  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp);
 +
 +  void float_cmp(bool is_float, int unordered_result,
 +                 FloatRegister f0, FloatRegister f1,
@@ -9624,9 +10082,9 @@ index 000000000..5d0cefe89
 +  // hdr     : must be x10, contents destroyed
 +  // obj     : must point to the object to lock, contents preserved
 +  // disp_hdr: must point to the displaced header location, contents preserved
-+  // tmp     : temporary register, contents destroyed
++  // scratch : scratch register, contents destroyed
 +  // returns code offset at which to add null check debug information
-+  int lock_object  (Register swap, Register obj, Register disp_hdr, Register tmp, Label& slow_case);
++  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
 +
 +  // unlocking
 +  // hdr     : contents destroyed
@@ -9686,14 +10144,14 @@ index 000000000..5d0cefe89
 +#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
 new file mode 100644
-index 000000000..f06e7b51c
+index 0000000000..329df2e1ca
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp
-@@ -0,0 +1,1206 @@
+@@ -0,0 +1,1210 @@
 +/*
 + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -9723,9 +10181,11 @@ index 000000000..f06e7b51c
 +#include "c1/c1_MacroAssembler.hpp"
 +#include "c1/c1_Runtime1.hpp"
 +#include "compiler/disassembler.hpp"
++#include "compiler/oopMap.hpp"
 +#include "gc/shared/cardTable.hpp"
 +#include "gc/shared/cardTableBarrierSet.hpp"
 +#include "interpreter/interpreter.hpp"
++#include "memory/universe.hpp"
 +#include "nativeInst_riscv.hpp"
 +#include "oops/compiledICHolder.hpp"
 +#include "oops/oop.inline.hpp"
@@ -9733,6 +10193,7 @@ index 000000000..f06e7b51c
 +#include "register_riscv.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/signature.hpp"
++#include "runtime/stubRoutines.hpp"
 +#include "runtime/vframe.hpp"
 +#include "runtime/vframeArray.hpp"
 +#include "vmreg_riscv.inline.hpp"
@@ -9740,11 +10201,11 @@ index 000000000..f06e7b51c
 +
 +// Implementation of StubAssembler
 +
-+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) {
++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, int args_size) {
 +  // setup registers
-+  assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result,
++  assert(!(oop_result->is_valid() || metadata_result->is_valid()) || oop_result != metadata_result,
 +         "registers must be different");
-+  assert(oop_result1 != xthread && metadata_result != xthread, "registers must be different");
++  assert(oop_result != xthread && metadata_result != xthread, "registers must be different");
 +  assert(args_size >= 0, "illegal args_size");
 +  bool align_stack = false;
 +
@@ -9780,7 +10241,7 @@ index 000000000..f06e7b51c
 +    beqz(t0, L);
 +    // exception pending => remove activation and forward to exception handler
 +    // make sure that the vm_results are cleared
-+    if (oop_result1->is_valid()) {
++    if (oop_result->is_valid()) {
 +      sd(zr, Address(xthread, JavaThread::vm_result_offset()));
 +    }
 +    if (metadata_result->is_valid()) {
@@ -9797,8 +10258,8 @@ index 000000000..f06e7b51c
 +    bind(L);
 +  }
 +  // get oop results if there are any and reset the values in the thread
-+  if (oop_result1->is_valid()) {
-+    get_vm_result(oop_result1, xthread);
++  if (oop_result->is_valid()) {
++    get_vm_result(oop_result, xthread);
 +  }
 +  if (metadata_result->is_valid()) {
 +    get_vm_result_2(metadata_result, xthread);
@@ -9806,12 +10267,12 @@ index 000000000..f06e7b51c
 +  return call_offset;
 +}
 +
-+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) {
++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1) {
 +  mv(c_rarg1, arg1);
-+  return call_RT(oop_result1, metadata_result, entry, 1);
++  return call_RT(oop_result, metadata_result, entry, 1);
 +}
 +
-+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) {
++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2) {
 +  const int arg_num = 2;
 +  if (c_rarg1 == arg2) {
 +    if (c_rarg2 == arg1) {
@@ -9826,10 +10287,10 @@ index 000000000..f06e7b51c
 +    mv(c_rarg1, arg1);
 +    mv(c_rarg2, arg2);
 +  }
-+  return call_RT(oop_result1, metadata_result, entry, arg_num);
++  return call_RT(oop_result, metadata_result, entry, arg_num);
 +}
 +
-+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
 +  const int arg_num = 3;
 +  // if there is any conflict use the stack
 +  if (arg1 == c_rarg2 || arg1 == c_rarg3 ||
@@ -9838,21 +10299,21 @@ index 000000000..f06e7b51c
 +    const int arg1_sp_offset = 0;
 +    const int arg2_sp_offset = 1;
 +    const int arg3_sp_offset = 2;
-+    addi(sp, sp, -(arg_num * wordSize));
-+    sd(arg3, Address(sp, arg3_sp_offset * wordSize));
-+    sd(arg2, Address(sp, arg2_sp_offset * wordSize));
++    addi(sp, sp, -(arg_num + 1) * wordSize);
 +    sd(arg1, Address(sp, arg1_sp_offset * wordSize));
++    sd(arg2, Address(sp, arg2_sp_offset * wordSize));
++    sd(arg3, Address(sp, arg3_sp_offset * wordSize));
 +
 +    ld(c_rarg1, Address(sp, arg1_sp_offset * wordSize));
 +    ld(c_rarg2, Address(sp, arg2_sp_offset * wordSize));
 +    ld(c_rarg3, Address(sp, arg3_sp_offset * wordSize));
-+    addi(sp, sp, arg_num * wordSize);
++    addi(sp, sp, (arg_num + 1) * wordSize);
 +  } else {
 +    mv(c_rarg1, arg1);
 +    mv(c_rarg2, arg2);
 +    mv(c_rarg3, arg3);
 +  }
-+  return call_RT(oop_result1, metadata_result, entry, arg_num);
++  return call_RT(oop_result, metadata_result, entry, arg_num);
 +}
 +
 +// Implementation of StubFrame
@@ -9919,7 +10380,7 @@ index 000000000..f06e7b51c
 +};
 +
 +// Save off registers which might be killed by calls into the runtime.
-+// Tries to smart of about FP registers.  In particular we separate
++// Tries to smart of about FPU registers.  In particular we separate
 +// saving and describing the FPU registers for deoptimization since we
 +// have to save the FPU registers twice if we describe them.  The
 +// deopt blob is the only thing which needs to describe FPU registers.
@@ -9936,11 +10397,12 @@ index 000000000..f06e7b51c
 +  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
 +  assert_cond(oop_map != NULL);
 +
-+  // cpu_regs, caller save registers only, see FrameMap::initialize
++  // caller save registers only, see FrameMap::initialize
 +  // in c1_FrameMap_riscv.cpp for detail.
-+  const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {x7, x10, x11, x12,
-+                                                                                        x13, x14, x15, x16, x17,
-+                                                                                        x28, x29, x30, x31};
++  const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {
++    x7, x10, x11, x12, x13, x14, x15, x16, x17, x28, x29, x30, x31
++  };
++
 +  for (int i = 0; i < FrameMap::max_nof_caller_save_cpu_regs; i++) {
 +    Register r = caller_save_cpu_regs[i];
 +    int sp_offset = cpu_reg_save_offsets[r->encoding()];
@@ -10165,7 +10627,7 @@ index 000000000..f06e7b51c
 +      __ leave();
 +      __ ret();  // jump to exception handler
 +      break;
-+    default:  ShouldNotReachHere();
++    default: ShouldNotReachHere();
 +  }
 +
 +  return oop_maps;
@@ -10269,14 +10731,13 @@ index 000000000..f06e7b51c
 +  __ reset_last_Java_frame(true);
 +
 +  // check for pending exceptions
-+  {
-+    Label L;
++  { Label L;
 +    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
 +    __ beqz(t0, L);
 +    // exception pending => remove activation and forward to exception handler
 +
 +    { Label L1;
-+      __ bnez(x10, L1);                                  // have we deoptimized?
++      __ bnez(x10, L1);                                 // have we deoptimized?
 +      __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
 +      __ bind(L1);
 +    }
@@ -10295,7 +10756,7 @@ index 000000000..f06e7b51c
 +    __ ld(x13, Address(fp, wordSize));
 +
 +#ifdef ASSERT
-+    // check that fields in JavaThread for exception oop and issuing pc are empty
++    // Check that fields in JavaThread for exception oop and issuing pc are empty
 +    Label oop_empty;
 +    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
 +    __ beqz(t0, oop_empty);
@@ -10334,6 +10795,7 @@ index 000000000..f06e7b51c
 +
 +  // Will reexecute. Proper return address is already on the stack we just restore
 +  // registers, pop all of our frame but the return address and jump to the deopt blob
++
 +  restore_live_registers(sasm);
 +  __ leave();
 +  __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
@@ -10693,7 +11155,7 @@ index 000000000..f06e7b51c
 +        __ check_klass_subtype_slow_path(x14, x10, x12, x15, NULL, &miss);
 +
 +        // fallthrough on success:
-+        __ mv(t0, 1);
++        __ li(t0, 1);
 +        __ sd(t0, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
 +        __ pop_reg(RegSet::of(x10, x12, x14, x15), sp);
 +        __ ret();
@@ -10883,7 +11345,7 @@ index 000000000..f06e7b51c
 +    default:
 +      {
 +        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
-+        __ mv(x10, (int)id);
++        __ li(x10, (int) id);
 +        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10);
 +        __ should_not_reach_here();
 +      }
@@ -10898,14 +11360,13 @@ index 000000000..f06e7b51c
 +const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; }
 diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
 new file mode 100644
-index 000000000..974c8fe76
+index 0000000000..9316d4be02
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp
-@@ -0,0 +1,72 @@
+@@ -0,0 +1,71 @@
 +/*
-+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -10962,7 +11423,7 @@ index 000000000..974c8fe76
 +define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
 +define_pd_global(uintx, MetaspaceSize,               12*M );
 +define_pd_global(bool, NeverActAsServerClassMachine, true );
-+define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
++define_pd_global(uint64_t, MaxRAM,                   1ULL*G);
 +define_pd_global(bool, CICompileOSR,                 true );
 +#endif // !TIERED
 +define_pd_global(bool, UseTypeProfile,               false);
@@ -10971,19 +11432,18 @@ index 000000000..974c8fe76
 +define_pd_global(bool, LIRFillDelaySlots,            false);
 +define_pd_global(bool, OptimizeSinglePrecision,      true );
 +define_pd_global(bool, CSEArrayLength,               false);
-+define_pd_global(bool, TwoOperandLIRForm,            false );
++define_pd_global(bool, TwoOperandLIRForm,            false);
 +
 +#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
 new file mode 100644
-index 000000000..bf4efa629
+index 0000000000..3da1f1c6d8
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp
-@@ -0,0 +1,91 @@
+@@ -0,0 +1,90 @@
 +/*
-+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -11029,7 +11489,7 @@ index 000000000..bf4efa629
 +
 +define_pd_global(intx, OnStackReplacePercentage,     140);
 +define_pd_global(intx, ConditionalMoveLimit,         0);
-+define_pd_global(intx, FLOATPRESSURE,                64);
++define_pd_global(intx, FLOATPRESSURE,                32);
 +define_pd_global(intx, FreqInlineSize,               325);
 +define_pd_global(intx, MinJumpTableSize,             10);
 +define_pd_global(intx, INTPRESSURE,                  24);
@@ -11059,7 +11519,7 @@ index 000000000..bf4efa629
 +define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
 +define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
 +define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
-+define_pd_global(uintx, CodeCacheMinBlockLength,     4);
++define_pd_global(uintx, CodeCacheMinBlockLength,     6);
 +define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
 +
 +// Heap related flags
@@ -11068,18 +11528,18 @@ index 000000000..bf4efa629
 +// Ergonomics related flags
 +define_pd_global(bool, NeverActAsServerClassMachine, false);
 +
-+define_pd_global(bool,  TrapBasedRangeChecks,        false); // Not needed.
++define_pd_global(bool, TrapBasedRangeChecks,         false); // Not needed.
 +
 +#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
 new file mode 100644
-index 000000000..3cb4a4995
+index 0000000000..cdbd69807b
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp
 @@ -0,0 +1,38 @@
 +/*
-+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -11117,12 +11577,12 @@ index 000000000..3cb4a4995
 +}
 diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
 new file mode 100644
-index 000000000..881900892
+index 0000000000..14a68b4502
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp
 @@ -0,0 +1,36 @@
 +/*
-+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -11159,14 +11619,14 @@ index 000000000..881900892
 +#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
 new file mode 100644
-index 000000000..0354a93a0
+index 0000000000..a4de342a93
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp
-@@ -0,0 +1,154 @@
+@@ -0,0 +1,149 @@
 +/*
-+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -11239,8 +11699,8 @@ index 000000000..0354a93a0
 +}
 +
 +int CompiledStaticCall::to_trampoline_stub_size() {
-+  // Somewhat pessimistically, we count four instructions here (although
-+  // there are only three) because we sometimes emit an alignment nop.
++  // Somewhat pessimistically, we count 4 instructions here (although
++  // there are only 3) because we sometimes emit an alignment nop.
 +  // Trampoline stubs are always word aligned.
 +  return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size;
 +}
@@ -11262,8 +11722,7 @@ index 000000000..0354a93a0
 +  }
 +
 +  // Creation also verifies the object.
-+  NativeMovConstReg* method_holder
-+    = nativeMovConstReg_at(stub);
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
 +#ifndef PRODUCT
 +  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
 +
@@ -11288,8 +11747,7 @@ index 000000000..0354a93a0
 +  address stub = static_stub->addr();
 +  assert(stub != NULL, "stub not found");
 +  // Creation also verifies the object.
-+  NativeMovConstReg* method_holder
-+    = nativeMovConstReg_at(stub);
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
 +  method_holder->set_data(0);
 +}
 +
@@ -11300,16 +11758,13 @@ index 000000000..0354a93a0
 +void CompiledDirectStaticCall::verify() {
 +  // Verify call.
 +  _call->verify();
-+  if (os::is_MP()) {
-+    _call->verify_alignment();
-+  }
++  _call->verify_alignment();
 +
 +  // Verify stub.
 +  address stub = find_stub(false /* is_aot */);
 +  assert(stub != NULL, "no stub found for static call");
 +  // Creation also verifies the object.
-+  NativeMovConstReg* method_holder
-+    = nativeMovConstReg_at(stub);
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
 +  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
 +
 +  // Verify state.
@@ -11319,14 +11774,14 @@ index 000000000..0354a93a0
 +#endif // !PRODUCT
 diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp
 new file mode 100644
-index 000000000..011e965ad
+index 0000000000..05da242e35
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/copy_riscv.hpp
-@@ -0,0 +1,60 @@
+@@ -0,0 +1,59 @@
 +/*
-+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -11357,7 +11812,6 @@ index 000000000..011e965ad
 +// Contains inline asm implementations
 +#include OS_CPU_HEADER_INLINE(copy)
 +
-+
 +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
 +  julong* to = (julong*) tohw;
 +  julong  v  = ((julong) value << 32) | value;
@@ -11385,7 +11839,7 @@ index 000000000..011e965ad
 +#endif // CPU_RISCV_COPY_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
 new file mode 100644
-index 000000000..31cee7103
+index 0000000000..e9ff307b64
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp
 @@ -0,0 +1,32 @@
@@ -11423,14 +11877,14 @@ index 000000000..31cee7103
 +#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
 new file mode 100644
-index 000000000..e97b89327
+index 0000000000..06bca5298c
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp
-@@ -0,0 +1,37 @@
+@@ -0,0 +1,38 @@
 +/*
-+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -11444,7 +11898,8 @@ index 000000000..e97b89327
 + * accompanied this code).
 + *
 + * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 + *
 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 + * or visit www.oracle.com if you need additional information or have any
@@ -11455,24 +11910,24 @@ index 000000000..e97b89327
 +#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP
 +#define CPU_RISCV_DISASSEMBLER_RISCV_HPP
 +
-+  static int pd_instruction_alignment() {
-+    return 1;
-+  }
++static int pd_instruction_alignment() {
++  return 1;
++}
 +
-+  static const char* pd_cpu_opts() {
-+    return "";
-+  }
++static const char* pd_cpu_opts() {
++  return "";
++}
 +
 +#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp
 new file mode 100644
-index 000000000..be6f1a67f
+index 0000000000..d4fcbdcbbd
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/frame_riscv.cpp
-@@ -0,0 +1,683 @@
+@@ -0,0 +1,694 @@
 +/*
-+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -11497,9 +11952,10 @@ index 000000000..be6f1a67f
 + */
 +
 +#include "precompiled.hpp"
++#include "compiler/oopMap.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "memory/resourceArea.hpp"
-+#include "oops/markOop.hpp"
++#include "memory/universe.hpp"
 +#include "oops/method.hpp"
 +#include "oops/oop.inline.hpp"
 +#include "prims/methodHandles.hpp"
@@ -11507,7 +11963,7 @@ index 000000000..be6f1a67f
 +#include "runtime/handles.inline.hpp"
 +#include "runtime/javaCalls.hpp"
 +#include "runtime/monitorChunk.hpp"
-+#include "runtime/os.hpp"
++#include "runtime/os.inline.hpp"
 +#include "runtime/signature.hpp"
 +#include "runtime/stubCodeGenerator.hpp"
 +#include "runtime/stubRoutines.hpp"
@@ -11526,19 +11982,19 @@ index 000000000..be6f1a67f
 +// Profiling/safepoint support
 +
 +bool frame::safe_for_sender(JavaThread *thread) {
-+  address   addr_sp = (address)_sp;
-+  address   addr_fp = (address)_fp;
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
 +  address   unextended_sp = (address)_unextended_sp;
 +
 +  // consider stack guards when trying to determine "safe" stack pointers
 +  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
-+    (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0;
-+  assert_cond(thread != NULL);
++                                   (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0;
 +  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
 +
 +  // sp must be within the usable part of the stack (not in guards)
-+  bool sp_safe = (addr_sp < thread->stack_base()) &&
-+                 (addr_sp >= thread->stack_base() - usable_stack_size);
++  bool sp_safe = (sp < thread->stack_base()) &&
++                 (sp >= thread->stack_base() - usable_stack_size);
++
 +
 +  if (!sp_safe) {
 +    return false;
@@ -11565,8 +12021,7 @@ index 000000000..be6f1a67f
 +
 +  // an fp must be within the stack and above (but not equal) sp
 +  // second evaluation on fp+ is added to handle situation where fp is -1
-+  bool fp_safe = (addr_fp < thread->stack_base() && (addr_fp > addr_sp) &&
-+                  (((addr_fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
++  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
 +
 +  // We know sp/unextended_sp are safe only fp is questionable here
 +
@@ -11630,9 +12085,10 @@ index 000000000..be6f1a67f
 +      if ((address)sender_sp >= thread->stack_base()) {
 +        return false;
 +      }
++
 +      sender_unextended_sp = sender_sp;
-+      sender_pc = (address) *(sender_sp + frame::return_addr_offset);
-+      saved_fp = (intptr_t*) *(sender_sp + frame::link_offset);
++      sender_pc = (address) *(sender_sp - 1);
++      saved_fp = (intptr_t*) *(sender_sp - 2);
 +    }
 +
 +
@@ -11642,6 +12098,7 @@ index 000000000..be6f1a67f
 +      // fp is always saved in a recognizable place in any code we generate. However
 +      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
 +      // is really a frame pointer.
++
 +      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
 +
 +      if (!saved_fp_safe) {
@@ -11996,13 +12453,12 @@ index 000000000..be6f1a67f
 +  // do some validation of frame elements
 +
 +  // first the method
-+
 +  Method* m = *interpreter_frame_method_addr();
-+
 +  // validate the method we'd find in this potential sender
 +  if (!Method::is_valid_method(m)) {
 +    return false;
 +  }
++
 +  // stack frames shouldn't be much larger than max_stack elements
 +  // this test requires the use of unextended_sp which is the sp as seen by
 +  // the current frame, and not sp which is the "raw" pc which could point
@@ -12013,7 +12469,7 @@ index 000000000..be6f1a67f
 +  }
 +
 +  // validate bci/bcx
-+  address  bcp    = interpreter_frame_bcp();
++  address bcp = interpreter_frame_bcp();
 +  if (m->validate_bci_from_bcp(bcp) < 0) {
 +    return false;
 +  }
@@ -12023,12 +12479,22 @@ index 000000000..be6f1a67f
 +  if (MetaspaceObj::is_valid(cp) == false) {
 +    return false;
 +  }
++
 +  // validate locals
-+  address locals =  (address) *interpreter_frame_locals_addr();
++  address locals = (address) *interpreter_frame_locals_addr();
++  if (locals > thread->stack_base()) {
++    return false;
++  }
 +
-+  if (locals > thread->stack_base() || locals < (address) fp()) {
++  if (m->max_locals() > 0 && locals < (address) fp()) {
++    // fp in interpreter frame on RISC-V is higher than that on AArch64,
++    // pointing to sender_sp and sender_sp-2 relatively.
++    // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp,
++    // pointing to sender_sp-1 (with one padding slot).
++    // So we verify the 'locals' pointer only if max_locals > 0.
 +    return false;
 +  }
++
 +  // We'd have to be pretty unlucky to be mislead at this point
 +  return true;
 +}
@@ -12155,14 +12621,13 @@ index 000000000..be6f1a67f
 +}
 diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp
 new file mode 100644
-index 000000000..7acabcbba
+index 0000000000..18e021dcb9
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/frame_riscv.hpp
-@@ -0,0 +1,200 @@
+@@ -0,0 +1,199 @@
 +/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -12212,7 +12677,7 @@ index 000000000..7acabcbba
 +//    [padding               ]
 +
 +//    [methodData            ]                   = mdp()                mdx_offset
-+//    [methodOop             ]                   = method()             method_offset
++//    [Method                ]                   = method()             method_offset
 +
 +//    [last esp              ]                   = last_sp()            last_sp_offset
 +//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
@@ -12354,19 +12819,19 @@ index 000000000..7acabcbba
 +  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
 +
 +  // deoptimization support
-+  void interpreter_frame_set_last_sp(intptr_t* ptr_sp);
++  void interpreter_frame_set_last_sp(intptr_t* last_sp);
 +
 +  static jint interpreter_frame_expression_stack_direction() { return -1; }
 +
 +#endif // CPU_RISCV_FRAME_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
 new file mode 100644
-index 000000000..5bc6b430c
+index 0000000000..abd5bda7e4
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp
-@@ -0,0 +1,257 @@
+@@ -0,0 +1,245 @@
 +/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -12504,11 +12969,6 @@ index 000000000..5bc6b430c
 +// frame.
 +inline intptr_t* frame::id(void) const { return unextended_sp(); }
 +
-+// Relationals on frames based
-+
-+// Return true if the frame is younger (more recent activation) than the frame represented by id
-+inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
-+                                                    return this->id() < id ; }
 +// Return true if the frame is older (less recent activation) than the frame represented by id
 +inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
 +                                                    return this->id() > id ; }
@@ -12604,33 +13064,26 @@ index 000000000..5bc6b430c
 +// Compiled frames
 +inline oop frame::saved_oop_result(RegisterMap* map) const {
 +  oop* result_adr = (oop *)map->location(x10->as_VMReg());
-+  if(result_adr != NULL) {
-+    return (*result_adr);
-+  } else {
-+    ShouldNotReachHere();
-+    return NULL;
-+  }
++  guarantee(result_adr != NULL, "bad register save location");
++  return (*result_adr);
 +}
 +
 +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
 +  oop* result_adr = (oop *)map->location(x10->as_VMReg());
-+  if(result_adr != NULL) {
-+    *result_adr = obj;
-+  } else {
-+    ShouldNotReachHere();
-+  }
++  guarantee(result_adr != NULL, "bad register save location");
++  *result_adr = obj;
 +}
 +
 +#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP
 diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
 new file mode 100644
-index 000000000..6f778956d
+index 0000000000..e191cbcee2
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,479 @@
+@@ -0,0 +1,481 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -12675,6 +13128,7 @@ index 000000000..6f778956d
 +
 +void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
 +                                                            Register addr, Register count, RegSet saved_regs) {
++  assert_cond(masm != NULL);
 +  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
 +  if (!dest_uninitialized) {
 +    Label done;
@@ -12717,6 +13171,7 @@ index 000000000..6f778956d
 +
 +void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 +                                                             Register start, Register count, Register tmp, RegSet saved_regs) {
++  assert_cond(masm != NULL);
 +  __ push_reg(saved_regs, sp);
 +  assert_different_registers(start, count, tmp);
 +  assert_different_registers(c_rarg0, count);
@@ -12736,7 +13191,8 @@ index 000000000..6f778956d
 +  // If expand_call is true then we expand the call_VM_leaf macro
 +  // directly to skip generating the check by
 +  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
-+  
++
++  assert_cond(masm != NULL);
 +  assert(thread == xthread, "must be");
 +
 +  Label done;
@@ -12784,21 +13240,15 @@ index 000000000..6f778956d
 +  __ j(done);
 +
 +  __ bind(runtime);
-+  // save the live input values
-+  RegSet saved = RegSet::of(pre_val);
-+  if (tosca_live) { saved += RegSet::of(x10); }
-+  if (obj != noreg) { saved += RegSet::of(obj); }
-+
-+  __ push_reg(saved, sp);
 +
++  __ push_call_clobbered_registers();
 +  if (expand_call) {
 +    assert(pre_val != c_rarg1, "smashed arg");
 +    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
 +  } else {
 +    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
 +  }
-+
-+  __ pop_reg(saved, sp);
++  __ pop_call_clobbered_registers();
 +
 +  __ bind(done);
 +
@@ -12810,6 +13260,7 @@ index 000000000..6f778956d
 +                                                  Register thread,
 +                                                  Register tmp,
 +                                                  Register tmp2) {
++  assert_cond(masm != NULL);
 +  assert(thread == xthread, "must be");
 +  assert_different_registers(store_addr, new_val, thread, tmp, tmp2,
 +                             t0);
@@ -12839,6 +13290,7 @@ index 000000000..6f778956d
 +
 +  // storing region crossing non-NULL, is card already dirty?
 +
++  ExternalAddress cardtable((address) ct->byte_map_base());
 +  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
 +  const Register card_addr = tmp;
 +
@@ -12885,7 +13337,8 @@ index 000000000..6f778956d
 +
 +void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 +                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
-+  bool on_oop = type == T_OBJECT || type == T_ARRAY;
++  assert_cond(masm != NULL);
++  bool on_oop = is_reference_type(type);
 +  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
 +  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
 +  bool on_reference = on_weak || on_phantom;
@@ -12907,16 +13360,19 @@ index 000000000..6f778956d
 +}
 +
 +void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                         Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  assert_cond(masm != NULL);
 +  // flatten object address if needed
 +  if (dst.offset() == 0) {
-+    __ mv(tmp3, dst.base());
++    if (dst.base() != x13) {
++      __ mv(x13, dst.base());
++    }
 +  } else {
-+    __ la(tmp3, dst);
++    __ la(x13, dst);
 +  }
 +
 +  g1_write_barrier_pre(masm,
-+                       tmp3 /* obj */,
++                       x13 /* obj */,
 +                       tmp2 /* pre_val */,
 +                       xthread /* thread */,
 +                       tmp1  /* tmp */,
@@ -12924,7 +13380,7 @@ index 000000000..6f778956d
 +                       false /* expand_call */);
 +
 +  if (val == noreg) {
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg);
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
 +  } else {
 +    // G1 barrier needs uncompressed oop for region cross check.
 +    Register new_val = val;
@@ -12932,9 +13388,9 @@ index 000000000..6f778956d
 +      new_val = t1;
 +      __ mv(new_val, val);
 +    }
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
 +    g1_write_barrier_post(masm,
-+                          tmp3 /* store_adr */,
++                          x13 /* store_adr */,
 +                          new_val /* new_val */,
 +                          xthread /* thread */,
 +                          tmp1 /* tmp */,
@@ -12961,8 +13417,7 @@ index 000000000..6f778956d
 +  Register pre_val_reg = stub->pre_val()->as_register();
 +
 +  if (stub->do_load()) {
-+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(),
-+                false /* wide */, false /* unaligned */);
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
 +  }
 +  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
 +  ce->store_parameter(stub->pre_val()->as_register(), 0);
@@ -13109,13 +13564,13 @@ index 000000000..6f778956d
 +#endif // COMPILER1
 diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
 new file mode 100644
-index 000000000..7f85e002d
+index 0000000000..37bc183f39
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp
 @@ -0,0 +1,78 @@
 +/*
 + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13175,7 +13630,7 @@ index 000000000..7f85e002d
 +                             Register tmp2);
 +
 +  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                            Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
++                            Address dst, Register val, Register tmp1, Register tmp2);
 +
 +public:
 +#ifdef COMPILER1
@@ -13191,15 +13646,52 @@ index 000000000..7f85e002d
 +};
 +
 +#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
+new file mode 100644
+index 0000000000..8735fd014f
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
++#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
++
++const size_t G1MergeHeapRootsPrefetchCacheSize = 16;
++
++#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
 new file mode 100644
-index 000000000..203b82744
+index 0000000000..2b556b95d7
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp
-@@ -0,0 +1,226 @@
+@@ -0,0 +1,231 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13223,15 +13715,23 @@ index 000000000..203b82744
 + */
 +
 +#include "precompiled.hpp"
++#include "classfile/classLoaderData.hpp"
++#include "gc/shared/barrierSet.hpp"
 +#include "gc/shared/barrierSetAssembler.hpp"
 +#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "memory/universe.hpp"
 +#include "runtime/jniHandles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
 +#include "runtime/thread.hpp"
 +
 +#define __ masm->
 +
 +void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 +                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
++  assert_cond(masm != NULL);
++
 +  // RA is live. It must be saved around calls.
 +
 +  bool in_heap = (decorators & IN_HEAP) != 0;
@@ -13271,7 +13771,8 @@ index 000000000..203b82744
 +}
 +
 +void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                   Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
++                                   Address dst, Register val, Register tmp1, Register tmp2) {
++  assert_cond(masm != NULL);
 +  bool in_heap = (decorators & IN_HEAP) != 0;
 +  bool in_native = (decorators & IN_NATIVE) != 0;
 +  switch (type) {
@@ -13311,16 +13812,9 @@ index 000000000..203b82744
 +
 +}
 +
-+void BarrierSetAssembler::obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far) {
-+  __ beq(obj1, obj2, equal, is_far);
-+}
-+
-+void BarrierSetAssembler::obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far) {
-+  __ bne(obj1, obj2, nequal, is_far);
-+}
-+
 +void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
 +                                                        Register obj, Register tmp, Label& slowpath) {
++  assert_cond(masm != NULL);
 +  // If mask changes we need to ensure that the inverse is still encodable as an immediate
 +  STATIC_ASSERT(JNIHandles::weak_tag_mask == 1);
 +  __ andi(obj, obj, ~JNIHandles::weak_tag_mask);
@@ -13335,6 +13829,7 @@ index 000000000..203b82744
 +                                        Register tmp2,
 +                                        Label& slow_case,
 +                                        bool is_far) {
++  assert_cond(masm != NULL);
 +  assert_different_registers(obj, tmp2);
 +  assert_different_registers(obj, var_size_in_bytes);
 +  Register end = tmp2;
@@ -13364,6 +13859,7 @@ index 000000000..203b82744
 +                                        Register tmp1,
 +                                        Label& slow_case,
 +                                        bool is_far) {
++  assert_cond(masm != NULL);
 +  assert_different_registers(obj, var_size_in_bytes, tmp1);
 +  if (!Universe::heap()->supports_inline_contig_alloc()) {
 +    __ j(slow_case);
@@ -13404,7 +13900,7 @@ index 000000000..203b82744
 +    // If heap_top hasn't been changed by some other thread, update it.
 +    __ sc_d(t1, end, t0, Assembler::rl);
 +    __ bnez(t1, retry);
-+    
++
 +    incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1);
 +  }
 +}
@@ -13413,6 +13909,7 @@ index 000000000..203b82744
 +                                               Register var_size_in_bytes,
 +                                               int con_size_in_bytes,
 +                                               Register tmp1) {
++  assert_cond(masm != NULL);
 +  assert(tmp1->is_valid(), "need temp reg");
 +
 +  __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
@@ -13425,13 +13922,13 @@ index 000000000..203b82744
 +}
 diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
 new file mode 100644
-index 000000000..964fc28be
+index 0000000000..984d94f4c3
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp
-@@ -0,0 +1,75 @@
+@@ -0,0 +1,76 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13458,6 +13955,7 @@ index 000000000..964fc28be
 +#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
 +
 +#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
 +#include "memory/allocation.hpp"
 +#include "oops/access.hpp"
 +
@@ -13475,9 +13973,8 @@ index 000000000..964fc28be
 +  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
 +                       Register dst, Address src, Register tmp1, Register tmp_thread);
 +  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                        Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
-+  virtual void obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far = false);
-+  virtual void obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far = false);
++                        Address dst, Register val, Register tmp1, Register tmp2);
++
 +  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
 +                                             Register obj, Register tmp, Label& slowpath);
 +
@@ -13488,7 +13985,7 @@ index 000000000..964fc28be
 +    Register tmp1,                     // temp register
 +    Register tmp2,                     // temp register
 +    Label&   slow_case,                // continuation point if fast allocation fails
-+    bool is_far = false                // the distance of label slowcase could be more than 12KiB in C1
++    bool is_far = false
 +  );
 +
 +  void eden_allocate(MacroAssembler* masm,
@@ -13497,22 +13994,23 @@ index 000000000..964fc28be
 +    int      con_size_in_bytes,        // object size in bytes if   known at compile time
 +    Register tmp1,                     // temp register
 +    Label&   slow_case,                // continuation point if fast allocation fails
-+    bool is_far = false                // the distance of label slowcase could be more than 12KiB in C1
++    bool is_far = false
 +  );
 +  virtual void barrier_stubs_init() {}
++
 +  virtual ~BarrierSetAssembler() {}
 +};
 +
 +#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
 new file mode 100644
-index 000000000..1720488fb
+index 0000000000..81d47d61d4
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,120 @@
+@@ -0,0 +1,125 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13541,12 +14039,14 @@ index 000000000..1720488fb
 +#include "gc/shared/cardTable.hpp"
 +#include "gc/shared/cardTableBarrierSet.hpp"
 +#include "gc/shared/cardTableBarrierSetAssembler.hpp"
++#include "gc/shared/gc_globals.hpp"
 +#include "interpreter/interp_masm.hpp"
 +
 +#define __ masm->
 +
 +
 +void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) {
++  assert_cond(masm != NULL);
 +  assert_different_registers(obj, tmp);
 +  BarrierSet* bs = BarrierSet::barrier_set();
 +  assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind");
@@ -13579,8 +14079,10 @@ index 000000000..1720488fb
 +
 +void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 +                                                                    Register start, Register count, Register tmp, RegSet saved_regs) {
++  assert_cond(masm != NULL);
 +  assert_different_registers(start, tmp);
 +  assert_different_registers(count, tmp);
++
 +  BarrierSet* bs = BarrierSet::barrier_set();
 +  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
 +  CardTable* ct = ctbs->card_table();
@@ -13612,33 +14114,34 @@ index 000000000..1720488fb
 +}
 +
 +void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                                Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
++                                                Address dst, Register val, Register tmp1, Register tmp2) {
 +  bool in_heap = (decorators & IN_HEAP) != 0;
 +  bool is_array = (decorators & IS_ARRAY) != 0;
 +  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
 +  bool precise = is_array || on_anonymous;
 +
 +  bool needs_post_barrier = val != noreg && in_heap;
-+  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg, noreg);
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
 +  if (needs_post_barrier) {
 +    // flatten object address if needed
 +    if (!precise || dst.offset() == 0) {
-+      store_check(masm, dst.base(), tmp3);
++      store_check(masm, dst.base(), x13);
 +    } else {
-+      __ la(tmp3, dst);
-+      store_check(masm, tmp3, t0);
++      assert_cond(masm != NULL);
++      __ la(x13, dst);
++      store_check(masm, x13, t0);
 +    }
 +  }
 +}
 diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
 new file mode 100644
-index 000000000..a5b3f9fe8
+index 0000000000..686fe8fa47
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,43 @@
+@@ -0,0 +1,42 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13674,20 +14177,19 @@ index 000000000..a5b3f9fe8
 +  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
 +                                                Register start, Register count, Register tmp, RegSet saved_regs);
 +  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                            Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
-+
++                            Address dst, Register val, Register tmp1, Register tmp2);
 +};
 +
 +#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
 new file mode 100644
-index 000000000..b82275297
+index 0000000000..7aa2015f9e
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,54 @@
+@@ -0,0 +1,55 @@
 +/*
-+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13718,6 +14220,7 @@ index 000000000..b82275297
 +
 +void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
 +                                                   Register src, Register dst, Register count, RegSet saved_regs) {
++
 +  if (is_oop) {
 +    gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs);
 +  }
@@ -13732,22 +14235,22 @@ index 000000000..b82275297
 +}
 +
 +void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                         Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
-+  if (type == T_OBJECT || type == T_ARRAY) {
-+    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  if (is_reference_type(type)) {
++    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
 +  } else {
-+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
 +  }
 +}
 diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
 new file mode 100644
-index 000000000..df206cc87
+index 0000000000..00419c3163
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp
 @@ -0,0 +1,55 @@
 +/*
 + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13788,7 +14291,7 @@ index 000000000..df206cc87
 +                                                Register start, Register count, Register tmp, RegSet saved_regs) {}
 +
 +  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                            Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) = 0;
++                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
 +
 +public:
 +  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
@@ -13796,18 +14299,18 @@ index 000000000..df206cc87
 +  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
 +                                  Register start, Register count, Register tmp, RegSet saved_regs);
 +  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                        Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
++                        Address dst, Register val, Register tmp1, Register tmp2);
 +};
 +
 +#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
 new file mode 100644
-index 000000000..6657f1be0
+index 0000000000..d19f5b859c
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp
-@@ -0,0 +1,124 @@
+@@ -0,0 +1,117 @@
 +/*
-+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -13834,6 +14337,7 @@ index 000000000..6657f1be0
 +#include "precompiled.hpp"
 +#include "c1/c1_LIRAssembler.hpp"
 +#include "c1/c1_MacroAssembler.hpp"
++#include "gc/shared/gc_globals.hpp"
 +#include "gc/shenandoah/shenandoahBarrierSet.hpp"
 +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
 +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp"
@@ -13859,14 +14363,6 @@ index 000000000..6657f1be0
 +
 +  ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq,
 +                                                 /* release */ Assembler::rl, /* is_cae */ false, result);
-+  if (UseBarriersForVolatile) {
-+    // The membar here is necessary to prevent reordering between the
-+    // release store in the CAS above and a subsequent volatile load.
-+    // However for !UseBarriersForVolatile, C1 inserts a full barrier before
-+    // volatile loads which means we don't need an additional barrier
-+    // here (see LIRGenerator::volatile_field_load()).
-+    __ membar(MacroAssembler::AnyAny);
-+  }
 +}
 +
 +#undef __
@@ -13932,13 +14428,13 @@ index 000000000..6657f1be0
 +}
 diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
 new file mode 100644
-index 000000000..1bc01e454
+index 0000000000..b8534c52e7
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
-@@ -0,0 +1,743 @@
+@@ -0,0 +1,715 @@
 +/*
 + * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13988,8 +14484,8 @@ index 000000000..1bc01e454
 +                                                       Register src, Register dst, Register count, RegSet saved_regs) {
 +  if (is_oop) {
 +    bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
-+    if ((ShenandoahSATBBarrier && !dest_uninitialized) ||
-+        ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
++    if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) {
++
 +      Label done;
 +
 +      // Avoid calling runtime if count == 0
@@ -14056,10 +14552,10 @@ index 000000000..1bc01e454
 +  Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset()));
 +
 +  // Is marking active?
-+  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++  if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) {
 +    __ lwu(tmp, in_progress);
 +  } else {
-+    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
 +    __ lbu(tmp, in_progress);
 +  }
 +  __ beqz(tmp, done);
@@ -14139,7 +14635,7 @@ index 000000000..1bc01e454
 +  // - Test lowest two bits == 0
 +  // - If so, set the lowest two bits
 +  // - Invert the result back, and copy to dst
-+  RegSet savedRegs = RegSet::of(t2);
++  RegSet saved_regs = RegSet::of(t2);
 +  bool borrow_reg = (tmp == noreg);
 +  if (borrow_reg) {
 +    // No free registers available. Make one useful.
@@ -14147,11 +14643,11 @@ index 000000000..1bc01e454
 +    if (tmp == dst) {
 +      tmp = t1;
 +    }
-+    savedRegs += RegSet::of(tmp);
++    saved_regs += RegSet::of(tmp);
 +  }
 +
 +  assert_different_registers(tmp, dst, t2);
-+  __ push_reg(savedRegs, sp);
++  __ push_reg(saved_regs, sp);
 +
 +  Label done;
 +  __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes()));
@@ -14162,14 +14658,15 @@ index 000000000..1bc01e454
 +  __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1
 +  __ bind(done);
 +
-+  __ pop_reg(savedRegs, sp);
++  __ pop_reg(saved_regs, sp);
 +}
 +
 +void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm,
-+                                                                    Register dst, Address load_addr) {
++                                                                    Register dst,
++                                                                    Address load_addr) {
 +  assert(ShenandoahLoadRefBarrier, "Should be enabled");
 +  assert(dst != t1 && load_addr.base() != t1, "need t1");
-+  assert_different_registers(load_addr.base(), t1, t2);
++  assert_different_registers(load_addr.base(), t0, t1);
 +
 +  Label done;
 +  __ enter();
@@ -14188,15 +14685,15 @@ index 000000000..1bc01e454
 +  }
 +
 +  // Save x10 and x11, unless it is an output register
-+  RegSet to_save = RegSet::of(x10, x11) - result_dst;
-+  __ push_reg(to_save, sp);
++  RegSet saved_regs = RegSet::of(x10, x11) - result_dst;
++  __ push_reg(saved_regs, sp);
 +  __ la(x11, load_addr);
 +  __ mv(x10, dst);
 +
 +  __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
 +
 +  __ mv(result_dst, x10);
-+  __ pop_reg(to_save, sp);
++  __ pop_reg(saved_regs, sp);
 +
 +  __ bind(done);
 +  __ leave();
@@ -14205,7 +14702,9 @@ index 000000000..1bc01e454
 +void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) {
 +  if (ShenandoahIUBarrier) {
 +    __ push_call_clobbered_registers();
++
 +    satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false);
++
 +    __ pop_call_clobbered_registers();
 +  }
 +}
@@ -14249,16 +14748,14 @@ index 000000000..1bc01e454
 +
 +  // 2: load a reference from src location and apply LRB if needed
 +  if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) {
-+    guarantee(dst != x30 && src.base() != x30, "load_at need x30");
-+    bool ist5 = (dst == src.base());
-+    if (ist5) {
-+      __ push_reg(RegSet::of(x30), sp);
-+    }
 +    Register result_dst = dst;
 +
 +    // Preserve src location for LRB
++    RegSet saved_regs;
 +    if (dst == src.base()) {
-+      dst = x30;
++      dst = (src.base() == x28) ? x29 : x28;
++      saved_regs = RegSet::of(dst);
++      __ push_reg(saved_regs, sp);
 +    }
 +    assert_different_registers(dst, src.base());
 +
@@ -14271,8 +14768,8 @@ index 000000000..1bc01e454
 +      dst = result_dst;
 +    }
 +
-+    if (ist5) {
-+      __ pop_reg(RegSet::of(x30), sp);
++    if (saved_regs.bits() != 0) {
++      __ pop_reg(saved_regs, sp);
 +    }
 +  } else {
 +    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
@@ -14295,24 +14792,24 @@ index 000000000..1bc01e454
 +}
 +
 +void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                                             Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
++                                             Address dst, Register val, Register tmp1, Register tmp2) {
 +  bool on_oop = is_reference_type(type);
 +  if (!on_oop) {
-+    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
 +    return;
 +  }
 +
 +  // flatten object address if needed
 +  if (dst.offset() == 0) {
-+    if (dst.base() != tmp3) {
-+      __ mv(tmp3, dst.base());
++    if (dst.base() != x13) {
++      __ mv(x13, dst.base());
 +    }
 +  } else {
-+    __ la(tmp3, dst);
++    __ la(x13, dst);
 +  }
 +
 +  shenandoah_write_barrier_pre(masm,
-+                               tmp3 /* obj */,
++                               x13 /* obj */,
 +                               tmp2 /* pre_val */,
 +                               xthread /* thread */,
 +                               tmp1  /* tmp */,
@@ -14320,7 +14817,7 @@ index 000000000..1bc01e454
 +                               false /* expand_call */);
 +
 +  if (val == noreg) {
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg);
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg);
 +  } else {
 +    iu_barrier(masm, val, tmp1);
 +    // G1 barrier needs uncompressed oop for region cross check.
@@ -14329,7 +14826,7 @@ index 000000000..1bc01e454
 +      new_val = t1;
 +      __ mv(new_val, val);
 +    }
-+    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg);
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg);
 +  }
 +}
 +
@@ -14370,39 +14867,10 @@ index 000000000..1bc01e454
 +// from-space, or it refers to the to-space version of an object that
 +// is being evacuated out of from-space.
 +//
-+// By default, this operation implements sequential consistency and the
-+// value held in the result register following execution of the
-+// generated code sequence is 0 to indicate failure of CAS, non-zero
-+// to indicate success.  Arguments support variations on this theme:
-+//
-+//  acquire: Allow relaxation of the memory ordering on CAS from
-+//           sequential consistency.  This can be useful when
-+//           sequential consistency is not required, such as when
-+//           another sequentially consistent operation is already
-+//           present in the execution stream.  If acquire, successful
-+//           execution has the side effect of assuring that memory
-+//           values updated by other threads and "released" will be
-+//           visible to any read operations perfomed by this thread
-+//           which follow this operation in program order.  This is a
-+//           special optimization that should not be enabled by default.
-+//  release: Allow relaxation of the memory ordering on CAS from
-+//           sequential consistency.  This can be useful when
-+//           sequential consistency is not required, such as when
-+//           another sequentially consistent operation is already
-+//           present in the execution stream.  If release, successful
-+//           completion of this operation has the side effect of
-+//           assuring that all writes to memory performed by this
-+//           thread that precede this operation in program order are
-+//           visible to all other threads that subsequently "acquire"
-+//           before reading the respective memory values.  This is a
-+//           special optimization that should not be enabled by default.
-+//  is_cae:  This turns CAS (compare and swap) into CAE (compare and
-+//           exchange).  This HotSpot convention is that CAE makes
-+//           available to the caller the "failure witness", which is
-+//           the value that was stored in memory which did not match
-+//           the expected value.  If is_cae, the result is the value
-+//           most recently fetched from addr rather than a boolean
-+//           success indicator.
++// By default the value held in the result register following execution
++// of the generated code sequence is 0 to indicate failure of CAS,
++// non-zero to indicate success. If is_cae, the result is the value most
++// recently fetched from addr rather than a boolean success indicator.
 +//
 +// Clobbers t0, t1
 +void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
@@ -14452,7 +14920,7 @@ index 000000000..1bc01e454
 +  if (is_cae) {
 +    __ mv(result, expected);
 +  } else {
-+    __ mv(result, 1);
++    __ addi(result, zr, 1);
 +  }
 +  __ j(done);
 +
@@ -14485,8 +14953,7 @@ index 000000000..1bc01e454
 +  Register pre_val_reg = stub->pre_val()->as_register();
 +
 +  if (stub->do_load()) {
-+    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(),
-+                stub->info(), false /* wide */, false /* unaligned */);
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/);
 +  }
 +  __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true);
 +  ce->store_parameter(stub->pre_val()->as_register(), 0);
@@ -14598,6 +15065,7 @@ index 000000000..1bc01e454
 +  __ push_call_clobbered_registers();
 +  __ load_parameter(0, x10);
 +  __ load_parameter(1, x11);
++
 +  if (UseCompressedOops) {
 +    __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
 +  } else {
@@ -14681,13 +15149,13 @@ index 000000000..1bc01e454
 +}
 diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
 new file mode 100644
-index 000000000..84bc55706
+index 0000000000..5d75035e9d
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp
-@@ -0,0 +1,92 @@
+@@ -0,0 +1,97 @@
 +/*
-+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -14715,6 +15183,7 @@ index 000000000..84bc55706
 +
 +#include "asm/macroAssembler.hpp"
 +#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
 +#ifdef COMPILER1
 +class LIR_Assembler;
 +class ShenandoahPreBarrierStub;
@@ -14724,31 +15193,6 @@ index 000000000..84bc55706
 +class StubCodeGenerator;
 +
 +class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
-+public:
-+  static address shenandoah_lrb();
-+
-+  void iu_barrier(MacroAssembler *masm, Register dst, Register tmp);
-+
-+#ifdef COMPILER1
-+  void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
-+  void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
-+  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
-+  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
-+#endif
-+
-+  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
-+                                  Register src, Register dst, Register count, RegSet saved_regs);
-+  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                       Register dst, Address src, Register tmp1, Register tmp_thread);
-+  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
-+                        Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
-+  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
-+                                             Register obj, Register tmp, Label& slowpath);
-+  virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
-+                           Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
-+
-+  virtual void barrier_stubs_init();
-+
 +private:
 +
 +  static address _shenandoah_lrb;
@@ -14774,15 +15218,44 @@ index 000000000..84bc55706
 +  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
 +
 +  address generate_shenandoah_lrb(StubCodeGenerator* cgen);
++
++public:
++
++  static address shenandoah_lrb();
++
++  void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
++
++#ifdef COMPILER1
++  void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
++  void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
++#endif
++
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register src, Register dst, Register count, RegSet saved_regs);
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
++
++  virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
++                   Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result);
++
++  virtual void barrier_stubs_init();
 +};
 +
 +#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
 new file mode 100644
-index 000000000..6e310697d
+index 0000000000..bab407a8b7
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad
-@@ -0,0 +1,188 @@
+@@ -0,0 +1,197 @@
 +//
 +// Copyright (c) 2018, Red Hat, Inc. All rights reserved.
 +// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
@@ -14856,7 +15329,7 @@ index 000000000..6e310697d
 +%}
 +
 +instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_exclusive(n));
++  predicate(needs_acquiring_load_reserved(n));
 +  match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval)));
 +  ins_cost(10 * DEFAULT_COST);
 +
@@ -14878,7 +15351,7 @@ index 000000000..6e310697d
 +%}
 +
 +instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{
-+  predicate(needs_acquiring_load_exclusive(n));
++  predicate(needs_acquiring_load_reserved(n));
 +  match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval)));
 +  ins_cost(10 * DEFAULT_COST);
 +
@@ -14903,9 +15376,11 @@ index 000000000..6e310697d
 +  match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval)));
 +  ins_cost(10 * DEFAULT_COST);
 +  effect(TEMP_DEF res, TEMP tmp, KILL cr);
++
 +  format %{
 +    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah"
 +  %}
++
 +  ins_encode %{
 +    Register tmp = $tmp$$Register;
 +    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
@@ -14913,6 +15388,7 @@ index 000000000..6e310697d
 +                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
 +                                                   true /* is_cae */, $res$$Register);
 +  %}
++
 +  ins_pipe(pipe_slow);
 +%}
 +
@@ -14924,6 +15400,7 @@ index 000000000..6e310697d
 +  format %{
 +    "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah"
 +  %}
++
 +  ins_encode %{
 +    Register tmp = $tmp$$Register;
 +    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
@@ -14931,6 +15408,7 @@ index 000000000..6e310697d
 +                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
 +                                                   true /* is_cae */, $res$$Register);
 +  %}
++
 +  ins_pipe(pipe_slow);
 +%}
 +
@@ -14943,6 +15421,7 @@ index 000000000..6e310697d
 +    "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah"
 +    "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)"
 +  %}
++
 +  ins_encode %{
 +    Register tmp = $tmp$$Register;
 +    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
@@ -14951,6 +15430,7 @@ index 000000000..6e310697d
 +                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
 +                                                   false /* is_cae */, $res$$Register);
 +  %}
++
 +  ins_pipe(pipe_slow);
 +%}
 +
@@ -14962,6 +15442,7 @@ index 000000000..6e310697d
 +  format %{
 +    "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah"
 +  %}
++
 +  ins_encode %{
 +    Register tmp = $tmp$$Register;
 +    __ mv(tmp, $oldval$$Register); // Must not clobber oldval.
@@ -14969,18 +15450,19 @@ index 000000000..6e310697d
 +                                                   Assembler::relaxed /* acquire */, Assembler::rl /* release */,
 +                                                   false /* is_cae */, $res$$Register);
 +  %}
++
 +  ins_pipe(pipe_slow);
 +%}
 diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
 new file mode 100644
-index 000000000..96068e637
+index 0000000000..d6ce8da07b
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp
-@@ -0,0 +1,44 @@
+@@ -0,0 +1,46 @@
 +/*
-+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15012,6 +15494,8 @@ index 000000000..96068e637
 +// 32-bit integer argument values are extended to 64 bits.
 +const bool CCallingConventionRequiresIntsAsLongs = false;
 +
++// To be safe, we deoptimize when we come across an access that needs
++// patching. This is similar to what is done on aarch64.
 +#define DEOPTIMIZE_WHEN_PATCHING
 +
 +#define SUPPORTS_NATIVE_CX8
@@ -15023,14 +15507,13 @@ index 000000000..96068e637
 +#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp
 new file mode 100644
-index 000000000..b46661a8f
+index 0000000000..90db2f4460
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/globals_riscv.hpp
-@@ -0,0 +1,120 @@
+@@ -0,0 +1,111 @@
 +/*
-+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15123,12 +15606,6 @@ index 000000000..b46661a8f
 +                                                                                 \
 +  product(bool, NearCpool, true,                                                 \
 +         "constant pool is close to instructions")                               \
-+  product(bool, UseBarriersForVolatile, false,                                   \
-+          "Use memory barriers to implement volatile accesses")                  \
-+  product(bool, UseCRC32, false,                                                 \
-+          "Use CRC32 instructions for CRC32 computation")                        \
-+  product(bool, UseBlockZeroing, true,                                           \
-+          "Use DC ZVA for block zeroing")                                        \
 +  product(intx, BlockZeroingLowLimit, 256,                                       \
 +          "Minimum size in bytes when block zeroing will be used")               \
 +          range(1, max_jint)                                                     \
@@ -15138,25 +15615,23 @@ index 000000000..b46661a8f
 +          "Extend i for r and o for w in the pred/succ flags of fence")          \
 +  product(bool, AvoidUnalignedAccesses, true,                                    \
 +          "Avoid generating unaligned memory accesses")                          \
-+  product(intx, EagerArrayCopyThreshold, 128,                                    \
-+          "Threshod of array length by bytes to "                                \
-+          "trigger the eager array copy")                                        \
-+          range(0, 65535)                                                        \
 +  experimental(bool, UseRVV, false, "Use RVV instructions")                      \
 +  experimental(bool, UseZba, false, "Use Zba instructions")                      \
-+  experimental(bool, UseZbb, false, "Use Zbb instructions")
++  experimental(bool, UseZbb, false, "Use Zbb instructions")                      \
++  experimental(bool, UseZbs, false, "Use Zbs instructions")                      \
++  experimental(bool, UseRVC, false, "Use RVC instructions")
 +
 +#endif // CPU_RISCV_GLOBALS_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
 new file mode 100644
-index 000000000..980b2a81b
+index 0000000000..cc93103dc5
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp
 @@ -0,0 +1,79 @@
 +/*
 + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15191,7 +15666,7 @@ index 000000000..980b2a81b
 +
 +int InlineCacheBuffer::ic_stub_code_size() {
 +  // 6: auipc + ld + auipc + jalr + address(2 * instruction_size)
-+  // 5: auipc + ld + j + address(2 * instruction_size )
++  // 5: auipc + ld + j + address(2 * instruction_size)
 +  return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size;
 +}
 +
@@ -15234,13 +15709,14 @@ index 000000000..980b2a81b
 +}
 diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp
 new file mode 100644
-index 000000000..ed8022784
+index 0000000000..d615dcfb9e
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/icache_riscv.cpp
-@@ -0,0 +1,61 @@
+@@ -0,0 +1,68 @@
 +/*
-+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2023, Rivos Inc. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15264,28 +15740,34 @@ index 000000000..ed8022784
 + */
 +
 +#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "riscv_flush_icache.hpp"
++#include "runtime/java.hpp"
 +#include "runtime/icache.hpp"
-+#include "macroAssembler_riscv.hpp"
 +
 +#define __ _masm->
 +
 +static int icache_flush(address addr, int lines, int magic) {
 +  // To make a store to instruction memory visible to all RISC-V harts,
 +  // the writing hart has to execute a data FENCE before requesting that
-+  // all remote RISC-V harts execute a FENCE.I
-+  //
-+  // No such-assurance is defined at the interface level of the builtin
-+  // method, and so we should make sure it works.
++  // all remote RISC-V harts execute a FENCE.I.
++
++  // We need to make sure stores happens before the I/D cache synchronization.
 +  __asm__ volatile("fence rw, rw" : : : "memory");
-+  
-+  __builtin___clear_cache(addr, addr + (lines << ICache::log2_line_size));
++
++  RiscvFlushIcache::flush((uintptr_t)addr, ((uintptr_t)lines) << ICache::log2_line_size);
++
 +  return magic;
 +}
 +
 +void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
++  // Only riscv_flush_icache is supported as I-cache synchronization.
++  // We must make sure the VM can execute such without error.
++  if (!RiscvFlushIcache::test()) {
++    vm_exit_during_initialization("Unable to synchronize I-cache");
++  }
 +
 +  address start = (address)icache_flush;
-+
 +  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
 +
 +  // ICache::invalidate_range() contains explicit condition that the first
@@ -15301,12 +15783,12 @@ index 000000000..ed8022784
 +#undef __
 diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp
 new file mode 100644
-index 000000000..a503d3be3
+index 0000000000..5bf40ca820
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/icache_riscv.hpp
 @@ -0,0 +1,42 @@
 +/*
-+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -15349,14 +15831,14 @@ index 000000000..a503d3be3
 +#endif // CPU_RISCV_ICACHE_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
 new file mode 100644
-index 000000000..91deb0ae2
+index 0000000000..b50be7e726
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp
-@@ -0,0 +1,1932 @@
+@@ -0,0 +1,1931 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15388,7 +15870,6 @@ index 000000000..91deb0ae2
 +#include "interpreter/interpreterRuntime.hpp"
 +#include "logging/log.hpp"
 +#include "oops/arrayOop.hpp"
-+#include "oops/markOop.hpp"
 +#include "oops/method.hpp"
 +#include "oops/methodData.hpp"
 +#include "prims/jvmtiExport.hpp"
@@ -15400,7 +15881,6 @@ index 000000000..91deb0ae2
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/thread.inline.hpp"
 +
-+
 +void InterpreterMacroAssembler::narrow(Register result) {
 +  // Get method->_constMethod->_result_type
 +  ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize));
@@ -15618,7 +16098,8 @@ index 000000000..91deb0ae2
 +  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
 +  // Convert from field index to ConstantPoolCacheEntry index
 +  // and from word offset to byte offset
-+  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord,
++         "else change next line");
 +  ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize));
 +  // skip past the header
 +  add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
@@ -15864,11 +16345,11 @@ index 000000000..91deb0ae2
 +  if (needs_thread_local_poll) {
 +    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
 +    ld(t1, Address(xthread, Thread::polling_page_offset()));
-+    andi(t1, t1, 1 << exact_log2(SafepointMechanism::poll_bit()));
++    andi(t1, t1, SafepointMechanism::poll_bit());
 +    bnez(t1, safepoint);
 +  }
 +  if (table == Interpreter::dispatch_table(state)) {
-+    mv(t1, Interpreter::distance_from_dispatch_table(state));
++    li(t1, Interpreter::distance_from_dispatch_table(state));
 +    add(t1, Rs, t1);
 +    shadd(t1, t1, xdispatch, t1, 3);
 +  } else {
@@ -16082,6 +16563,7 @@ index 000000000..91deb0ae2
 +
 +  // restore sender esp
 +  mv(esp, t1);
++
 +  // remove frame anchor
 +  leave();
 +  // If we're returning to interpreted code we will shortly be
@@ -16160,7 +16642,7 @@ index 000000000..91deb0ae2
 +    // least significant 3 bits clear.
 +    // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg
 +    sub(swap_reg, swap_reg, sp);
-+    mv(t0, (int64_t)(7 - os::vm_page_size()));
++    li(t0, (int64_t)(7 - os::vm_page_size()));
 +    andr(swap_reg, swap_reg, t0);
 +
 +    // Save the test result, for recursive case, the result is zero
@@ -16261,7 +16743,7 @@ index 000000000..91deb0ae2
 +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
 +  assert(ProfileInterpreter, "must be profiling interpreter");
 +  Label set_mdp;
-+  push_reg(RegSet::of(x10, x11), sp); // save x10, x11
++  push_reg(0xc00, sp); // save x10, x11
 +
 +  // Test MDO to avoid the call if it is NULL.
 +  ld(x10, Address(xmethod, in_bytes(Method::method_data_offset())));
@@ -16274,7 +16756,7 @@ index 000000000..91deb0ae2
 +  add(x10, x11, x10);
 +  sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
 +  bind(set_mdp);
-+  pop_reg(RegSet::of(x10, x11), sp);
++  pop_reg(0xc00, sp);
 +}
 +
 +void InterpreterMacroAssembler::verify_method_data_pointer() {
@@ -16414,7 +16896,7 @@ index 000000000..91deb0ae2
 +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
 +                                                       int constant) {
 +  assert(ProfileInterpreter, "must be profiling interpreter");
-+  addi(mdp_in, mdp_in, constant);
++  addi(mdp_in, mdp_in, (unsigned)constant);
 +  sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize));
 +}
 +
@@ -17012,7 +17494,7 @@ index 000000000..91deb0ae2
 +
 +  ld(t0, mdo_addr);
 +  beqz(t0, none);
-+  mv(tmp, (u1)TypeEntries::null_seen);
++  li(tmp, (u1)TypeEntries::null_seen);
 +  beq(t0, tmp, none);
 +  // There is a chance that the checks above (re-reading profiling
 +  // data from memory) fail if another thread has just set the
@@ -17047,10 +17529,10 @@ index 000000000..91deb0ae2
 +
 +    lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start));
 +    if (is_virtual) {
-+      mv(tmp, (u1)DataLayout::virtual_call_type_data_tag);
++      li(tmp, (u1)DataLayout::virtual_call_type_data_tag);
 +      bne(t0, tmp, profile_continue);
 +    } else {
-+      mv(tmp, (u1)DataLayout::call_type_data_tag);
++      li(tmp, (u1)DataLayout::call_type_data_tag);
 +      bne(t0, tmp, profile_continue);
 +    }
 +
@@ -17080,7 +17562,7 @@ index 000000000..91deb0ae2
 +      mv(index, zr); // index < TypeProfileArgsLimit
 +      bind(loop);
 +      bgtz(index, profileReturnType);
-+      mv(t0, (int)MethodData::profile_return());
++      li(t0, (int)MethodData::profile_return());
 +      beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false
 +      bind(profileReturnType);
 +      // If return value type is profiled we may have no argument to profile
@@ -17088,7 +17570,7 @@ index 000000000..91deb0ae2
 +      mv(t1, - TypeStackSlotEntries::per_arg_count());
 +      mul(t1, index, t1);
 +      add(tmp, tmp, t1);
-+      mv(t1, TypeStackSlotEntries::per_arg_count());
++      li(t1, TypeStackSlotEntries::per_arg_count());
 +      add(t0, mdp, off_to_args);
 +      blt(tmp, t1, done);
 +
@@ -17099,8 +17581,8 @@ index 000000000..91deb0ae2
 +      // stack offset o (zero based) from the start of the argument
 +      // list, for n arguments translates into offset n - o - 1 from
 +      // the end of the argument list
-+      mv(t0, stack_slot_offset0);
-+      mv(t1, slot_step);
++      li(t0, stack_slot_offset0);
++      li(t1, slot_step);
 +      mul(t1, index, t1);
 +      add(t0, t0, t1);
 +      add(t0, mdp, t0);
@@ -17110,8 +17592,8 @@ index 000000000..91deb0ae2
 +      Address arg_addr = argument_address(tmp);
 +      ld(tmp, arg_addr);
 +
-+      mv(t0, argument_type_offset0);
-+      mv(t1, type_step);
++      li(t0, argument_type_offset0);
++      li(t1, type_step);
 +      mul(t1, index, t1);
 +      add(t0, t0, t1);
 +      add(mdo_addr, mdp, t0);
@@ -17123,7 +17605,7 @@ index 000000000..91deb0ae2
 +
 +      // increment index by 1
 +      addi(index, index, 1);
-+      mv(t1, TypeProfileArgsLimit);
++      li(t1, TypeProfileArgsLimit);
 +      blt(index, t1, loop);
 +      bind(loopEnd);
 +
@@ -17178,13 +17660,13 @@ index 000000000..91deb0ae2
 +      // length
 +      Label do_profile;
 +      lbu(t0, Address(xbcp, 0));
-+      mv(tmp, (u1)Bytecodes::_invokedynamic);
++      li(tmp, (u1)Bytecodes::_invokedynamic);
 +      beq(t0, tmp, do_profile);
-+      mv(tmp, (u1)Bytecodes::_invokehandle);
++      li(tmp, (u1)Bytecodes::_invokehandle);
 +      beq(t0, tmp, do_profile);
 +      get_method(tmp);
 +      lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
-+      mv(t1, vmIntrinsics::_compiledLambdaForm);
++      li(t1, vmIntrinsics::_compiledLambdaForm);
 +      bne(t0, t1, profile_continue);
 +      bind(do_profile);
 +    }
@@ -17227,7 +17709,6 @@ index 000000000..91deb0ae2
 +    add(t0, mdp, off_base);
 +    add(t1, mdp, type_base);
 +
-+
 +    shadd(tmp2, tmp1, t0, tmp2, per_arg_scale);
 +    // load offset on the stack from the slot for this parameter
 +    ld(tmp2, Address(tmp2, 0));
@@ -17287,12 +17768,12 @@ index 000000000..91deb0ae2
 +#endif
 diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
 new file mode 100644
-index 000000000..042ee8280
+index 0000000000..4126e8ee70
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp
 @@ -0,0 +1,283 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -17439,7 +17920,7 @@ index 000000000..042ee8280
 +  void load_ptr(int n, Register val);
 +  void store_ptr(int n, Register val);
 +
-+// Load float value from 'address'. The value is loaded onto the FPU register v0.
++  // Load float value from 'address'. The value is loaded onto the FPU register v0.
 +  void load_float(Address src);
 +  void load_double(Address src);
 +
@@ -17576,14 +18057,14 @@ index 000000000..042ee8280
 +#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
 new file mode 100644
-index 000000000..777f326e3
+index 0000000000..776b078723
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp
-@@ -0,0 +1,296 @@
+@@ -0,0 +1,295 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -17637,8 +18118,9 @@ index 000000000..777f326e3
 +FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() {
 +  if (_num_reg_fp_args < Argument::n_float_register_parameters_c) {
 +    return g_FPArgReg[_num_reg_fp_args++];
++  } else {
++    return fnoreg;
 +  }
-+  return fnoreg;
 +}
 +
 +int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() {
@@ -17760,7 +18242,6 @@ index 000000000..777f326e3
 +  unsigned int _num_reg_int_args;
 +  unsigned int _num_reg_fp_args;
 +
-+
 +  intptr_t* single_slot_addr() {
 +    intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
 +    _from -= Interpreter::stackElementSize;
@@ -17793,7 +18274,6 @@ index 000000000..777f326e3
 +    *_to++ = value;
 +  }
 +
-+
 +  virtual void pass_int() {
 +    jint value = *(jint*)single_slot_addr();
 +    if (pass_gpr(value) < 0) {
@@ -17801,7 +18281,6 @@ index 000000000..777f326e3
 +    }
 +  }
 +
-+
 +  virtual void pass_long() {
 +    intptr_t value = *double_slot_addr();
 +    if (pass_gpr(value) < 0) {
@@ -17826,7 +18305,7 @@ index 000000000..777f326e3
 +    }
 +  }
 +
-+ virtual void pass_double() {
++  virtual void pass_double() {
 +    intptr_t value = *double_slot_addr();
 +    int arg = pass_fpr(value);
 +    if (0 <= arg) {
@@ -17844,12 +18323,13 @@ index 000000000..777f326e3
 +    _to   = to;
 +
 +    _int_args = to - (method->is_static() ? 16 : 17);
-+    _fp_args =  to - 8;
++    _fp_args  = to - 8;
 +    _fp_identifiers = to - 9;
 +    *(int*) _fp_identifiers = 0;
 +    _num_reg_int_args = (method->is_static() ? 1 : 0);
 +    _num_reg_fp_args = 0;
 +  }
++
 +  ~SlowSignatureHandler()
 +  {
 +    _from           = NULL;
@@ -17871,19 +18351,19 @@ index 000000000..777f326e3
 +
 +  // handle arguments
 +  SlowSignatureHandler ssh(m, (address)from, to);
-+  ssh.iterate((uint64_t)UCONST64(-1));
++  ssh.iterate(UCONST64(-1));
 +
 +  // return result handler
 +  return Interpreter::result_handler(m->result_type());
 +IRT_END
 diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
 new file mode 100644
-index 000000000..06342869f
+index 0000000000..05df63ba2a
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp
 @@ -0,0 +1,68 @@
 +/*
-+ * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -17952,14 +18432,13 @@ index 000000000..06342869f
 +#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
 new file mode 100644
-index 000000000..a169b8c5f
+index 0000000000..5a0c9b812f
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp
 @@ -0,0 +1,89 @@
 +/*
-+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -17988,7 +18467,7 @@ index 000000000..a169b8c5f
 +private:
 +
 +  // FP value associated with _last_Java_sp:
-+  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
++  intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to
 +
 +public:
 +  // Each arch must define reset, save, restore
@@ -18038,23 +18517,24 @@ index 000000000..a169b8c5f
 +
 +public:
 +
-+  void set_last_Java_sp(intptr_t* java_sp)           { _last_Java_sp = java_sp; OrderAccess::release(); }
++  void set_last_Java_sp(intptr_t* java_sp)       { _last_Java_sp = java_sp; OrderAccess::release(); }
++
++  intptr_t* last_Java_fp(void)                   { return _last_Java_fp; }
 +
-+  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
 +  // Assert (last_Java_sp == NULL || fp == NULL)
-+  void set_last_Java_fp(intptr_t* java_fp)           { OrderAccess::release(); _last_Java_fp = java_fp; }
++  void set_last_Java_fp(intptr_t* fp)            { OrderAccess::release(); _last_Java_fp = fp; }
 +
 +#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
 new file mode 100644
-index 000000000..9bab8e78f
+index 0000000000..f6e7351c4f
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
-@@ -0,0 +1,193 @@
+@@ -0,0 +1,194 @@
 +/*
-+ * Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -18084,6 +18564,7 @@ index 000000000..9bab8e78f
 +#include "memory/resourceArea.hpp"
 +#include "prims/jniFastGetField.hpp"
 +#include "prims/jvm_misc.hpp"
++#include "prims/jvmtiExport.hpp"
 +#include "runtime/safepoint.hpp"
 +
 +#define __ masm->
@@ -18137,10 +18618,10 @@ index 000000000..9bab8e78f
 +  __ bnez(t0, slow);
 +  __ xorr(robj, c_rarg1, rcounter);
 +  __ xorr(robj, robj, rcounter);               // obj, since
-+                                              // robj ^ rcounter ^ rcounter == robj
-+                                              // robj is address dependent on rcounter.
-+
++                                               // robj ^ rcounter ^ rcounter == robj
++                                               // robj is address dependent on rcounter.
 +
++  // Both robj and t0 are clobbered by try_resolve_jobject_in_native.
 +  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 +  assert_cond(bs != NULL);
 +  bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow);
@@ -18150,6 +18631,7 @@ index 000000000..9bab8e78f
 +  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
 +  speculative_load_pclist[count] = __ pc();   // Used by the segfault handler
 +  __ add(roffset, robj, roffset);
++
 +  switch (type) {
 +    case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break;
 +    case T_BYTE:    __ lb(result, Address(roffset, 0)); break;
@@ -18170,7 +18652,6 @@ index 000000000..9bab8e78f
 +    default:        ShouldNotReachHere();
 +  }
 +
-+  // counter_addr is address dependent on result.
 +  __ xorr(rcounter_addr, rcounter_addr, result);
 +  __ xorr(rcounter_addr, rcounter_addr, result);
 +  __ lw(t0, safepoint_counter_addr);
@@ -18246,14 +18727,13 @@ index 000000000..9bab8e78f
 +}
 diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
 new file mode 100644
-index 000000000..96775e0db
+index 0000000000..df3c0267ee
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp
-@@ -0,0 +1,108 @@
+@@ -0,0 +1,106 @@
 +/*
-+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -18280,7 +18760,6 @@ index 000000000..96775e0db
 +#define CPU_RISCV_JNITYPES_RISCV_HPP
 +
 +#include "jni.h"
-+#include "memory/allocation.hpp"
 +#include "oops/oop.hpp"
 +
 +// This file holds platform-dependent routines used to write primitive jni
@@ -18319,9 +18798,9 @@ index 000000000..96775e0db
 +  }
 +
 +  // Oops are stored in native format in one JavaCallArgument slot at *to.
-+  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
-+  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
-+  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
++  static inline void    put_obj(oop  from, intptr_t *to)                { *(oop *)(to +   0  ) =  from; }
++  static inline void    put_obj(oop  from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) =  from; }
++  static inline void    put_obj(oop *from, intptr_t *to, int& pos)      { *(oop *)(to + pos++) = *from; }
 +
 +  // Floats are stored in native format in one JavaCallArgument slot at *to.
 +  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
@@ -18360,14 +18839,14 @@ index 000000000..96775e0db
 +#endif // CPU_RISCV_JNITYPES_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
 new file mode 100644
-index 000000000..5d6078bb3
+index 0000000000..e18bd3d8e2
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
-@@ -0,0 +1,5861 @@
+@@ -0,0 +1,5410 @@
 +/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -18398,26 +18877,26 @@ index 000000000..5d6078bb3
 +#include "gc/shared/barrierSetAssembler.hpp"
 +#include "gc/shared/cardTable.hpp"
 +#include "gc/shared/cardTableBarrierSet.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
 +#include "nativeInst_riscv.hpp"
 +#include "oops/accessDecorators.hpp"
 +#include "oops/compressedOops.inline.hpp"
 +#include "oops/klass.inline.hpp"
++#include "oops/oop.hpp"
 +#include "runtime/biasedLocking.hpp"
 +#include "runtime/interfaceSupport.inline.hpp"
 +#include "runtime/jniHandles.inline.hpp"
 +#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
 +#include "runtime/thread.hpp"
-+#include "utilities/macros.hpp"
-+#ifdef COMPILER1
-+#include "c1/c1_LIRAssembler.hpp"
-+#endif
 +#ifdef COMPILER2
-+#include "oops/oop.hpp"
 +#include "opto/compile.hpp"
 +#include "opto/intrinsicnode.hpp"
-+#include "opto/subnode.hpp"
++#include "opto/node.hpp"
++#include "opto/output.hpp"
 +#endif
 +
 +#ifdef PRODUCT
@@ -18429,30 +18908,35 @@ index 000000000..5d6078bb3
 +
 +static void pass_arg0(MacroAssembler* masm, Register arg) {
 +  if (c_rarg0 != arg) {
++    assert_cond(masm != NULL);
 +    masm->mv(c_rarg0, arg);
 +  }
 +}
 +
 +static void pass_arg1(MacroAssembler* masm, Register arg) {
 +  if (c_rarg1 != arg) {
++    assert_cond(masm != NULL);
 +    masm->mv(c_rarg1, arg);
 +  }
 +}
 +
 +static void pass_arg2(MacroAssembler* masm, Register arg) {
 +  if (c_rarg2 != arg) {
++    assert_cond(masm != NULL);
 +    masm->mv(c_rarg2, arg);
 +  }
 +}
 +
 +static void pass_arg3(MacroAssembler* masm, Register arg) {
 +  if (c_rarg3 != arg) {
++    assert_cond(masm != NULL);
 +    masm->mv(c_rarg3, arg);
 +  }
 +}
 +
-+void MacroAssembler::align(int modulus) {
-+  while (offset() % modulus != 0) { nop(); }
++void MacroAssembler::align(int modulus, int extra_offset) {
++  CompressibleRegion cr(this);
++  while ((offset() + extra_offset) % modulus != 0) { nop(); }
 +}
 +
 +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
@@ -18553,6 +19037,22 @@ index 000000000..5d6078bb3
 +void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
 +void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
 +
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  intptr_t value = *delayed_value_addr;
++  if (value != 0)
++    return RegisterOrConstant(value + offset);
++
++  // load indirectly to solve generation ordering problem
++  ld(tmp, ExternalAddress((address) delayed_value_addr));
++
++  if (offset != 0)
++    add(tmp, tmp, offset);
++
++  return RegisterOrConstant(tmp);
++}
++
 +// Calls to C land
 +//
 +// When entering C land, the fp, & esp of the last Java frame have to be recorded
@@ -18604,11 +19104,36 @@ index 000000000..5d6078bb3
 +  if (L.is_bound()) {
 +    set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
 +  } else {
++    InstructionMark im(this);
 +    L.add_patch_at(code(), locator());
 +    set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
 +  }
 +}
 +
++// Just like safepoint_poll, but use an acquiring load for thread-
++// local polling.
++//
++// We need an acquire here to ensure that any subsequent load of the
++// global SafepointSynchronize::_state flag is ordered after this load
++// of the local Thread::_polling page.  We don't want this poll to
++// return false (i.e. not safepointing) and a later poll of the global
++// SafepointSynchronize::_state spuriously to return true.
++//
++// This is to avoid a race when we're in a native->Java transition
++// racing the code which wakes up from a safepoint.
++//
++void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    membar(MacroAssembler::AnyAny);
++    ld(t1, Address(xthread, Thread::polling_page_offset()));
++    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    andi(t0, t1, SafepointMechanism::poll_bit());
++    bnez(t0, slow_path);
++  } else {
++    safepoint_poll(slow_path);
++  }
++}
++
 +void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
 +  // we must set sp to zero to clear frame
 +  sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
@@ -18693,7 +19218,6 @@ index 000000000..5d6078bb3
 +  sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
 +}
 +
-+
 +void MacroAssembler::verify_oop(Register reg, const char* s) {
 +  if (!VerifyOops) { return; }
 +
@@ -18710,11 +19234,10 @@ index 000000000..5d6078bb3
 +  push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 +
 +  mv(c_rarg0, reg); // c_rarg0 : x10
-+  if(b != NULL) {
-+    movptr(t0, (uintptr_t)(address)b);
-+  } else {
-+    ShouldNotReachHere();
-+  }
++  // The length of the instruction sequence emitted should be independent
++  // of the values of the local char buffer address so that the size of mach
++  // nodes for scratch emit and normal emit matches.
++  mv(t0, (address)b);
 +
 +  // call indirectly to solve generation ordering problem
 +  int32_t offset = 0;
@@ -18749,11 +19272,11 @@ index 000000000..5d6078bb3
 +  } else {
 +    ld(x10, addr);
 +  }
-+  if(b != NULL) {
-+    movptr(t0, (uintptr_t)(address)b);
-+  } else {
-+    ShouldNotReachHere();
-+  }
++
++  // The length of the instruction sequence emitted should be independent
++  // of the values of the local char buffer address so that the size of mach
++  // nodes for scratch emit and normal emit matches.
++  mv(t0, (address)b);
 +
 +  // call indirectly to solve generation ordering problem
 +  int32_t offset = 0;
@@ -18803,51 +19326,47 @@ index 000000000..5d6078bb3
 +#endif
 +    if (os::message_box(msg, "Execution stopped, print registers?")) {
 +      ttyLocker ttyl;
-+      tty->print_cr(" pc = 0x%016" PRIX64, pc);
++      tty->print_cr(" pc = 0x%016lx", pc);
 +#ifndef PRODUCT
 +      tty->cr();
 +      findpc(pc);
 +      tty->cr();
 +#endif
-+      tty->print_cr(" x0 = 0x%016" PRIx64, regs[0]);
-+      tty->print_cr(" x1 = 0x%016" PRIx64, regs[1]);
-+      tty->print_cr(" x2 = 0x%016" PRIx64, regs[2]);
-+      tty->print_cr(" x3 = 0x%016" PRIx64, regs[3]);
-+      tty->print_cr(" x4 = 0x%016" PRIx64, regs[4]);
-+      tty->print_cr(" x5 = 0x%016" PRIx64, regs[5]);
-+      tty->print_cr(" x6 = 0x%016" PRIx64, regs[6]);
-+      tty->print_cr(" x7 = 0x%016" PRIx64, regs[7]);
-+      tty->print_cr(" x8 = 0x%016" PRIx64, regs[8]);
-+      tty->print_cr(" x9 = 0x%016" PRIx64, regs[9]);
-+      tty->print_cr("x10 = 0x%016" PRIx64, regs[10]);
-+      tty->print_cr("x11 = 0x%016" PRIx64, regs[11]);
-+      tty->print_cr("x12 = 0x%016" PRIx64, regs[12]);
-+      tty->print_cr("x13 = 0x%016" PRIx64, regs[13]);
-+      tty->print_cr("x14 = 0x%016" PRIx64, regs[14]);
-+      tty->print_cr("x15 = 0x%016" PRIx64, regs[15]);
-+      tty->print_cr("x16 = 0x%016" PRIx64, regs[16]);
-+      tty->print_cr("x17 = 0x%016" PRIx64, regs[17]);
-+      tty->print_cr("x18 = 0x%016" PRIx64, regs[18]);
-+      tty->print_cr("x19 = 0x%016" PRIx64, regs[19]);
-+      tty->print_cr("x20 = 0x%016" PRIx64, regs[20]);
-+      tty->print_cr("x21 = 0x%016" PRIx64, regs[21]);
-+      tty->print_cr("x22 = 0x%016" PRIx64, regs[22]);
-+      tty->print_cr("x23 = 0x%016" PRIx64, regs[23]);
-+      tty->print_cr("x24 = 0x%016" PRIx64, regs[24]);
-+      tty->print_cr("x25 = 0x%016" PRIx64, regs[25]);
-+      tty->print_cr("x26 = 0x%016" PRIx64, regs[26]);
-+      tty->print_cr("x27 = 0x%016" PRIx64, regs[27]);
-+      tty->print_cr("x28 = 0x%016" PRIx64, regs[28]);
-+      tty->print_cr("x30 = 0x%016" PRIx64, regs[30]);
-+      tty->print_cr("x31 = 0x%016" PRIx64, regs[31]);
++      tty->print_cr(" x0 = 0x%016lx", regs[0]);
++      tty->print_cr(" x1 = 0x%016lx", regs[1]);
++      tty->print_cr(" x2 = 0x%016lx", regs[2]);
++      tty->print_cr(" x3 = 0x%016lx", regs[3]);
++      tty->print_cr(" x4 = 0x%016lx", regs[4]);
++      tty->print_cr(" x5 = 0x%016lx", regs[5]);
++      tty->print_cr(" x6 = 0x%016lx", regs[6]);
++      tty->print_cr(" x7 = 0x%016lx", regs[7]);
++      tty->print_cr(" x8 = 0x%016lx", regs[8]);
++      tty->print_cr(" x9 = 0x%016lx", regs[9]);
++      tty->print_cr("x10 = 0x%016lx", regs[10]);
++      tty->print_cr("x11 = 0x%016lx", regs[11]);
++      tty->print_cr("x12 = 0x%016lx", regs[12]);
++      tty->print_cr("x13 = 0x%016lx", regs[13]);
++      tty->print_cr("x14 = 0x%016lx", regs[14]);
++      tty->print_cr("x15 = 0x%016lx", regs[15]);
++      tty->print_cr("x16 = 0x%016lx", regs[16]);
++      tty->print_cr("x17 = 0x%016lx", regs[17]);
++      tty->print_cr("x18 = 0x%016lx", regs[18]);
++      tty->print_cr("x19 = 0x%016lx", regs[19]);
++      tty->print_cr("x20 = 0x%016lx", regs[20]);
++      tty->print_cr("x21 = 0x%016lx", regs[21]);
++      tty->print_cr("x22 = 0x%016lx", regs[22]);
++      tty->print_cr("x23 = 0x%016lx", regs[23]);
++      tty->print_cr("x24 = 0x%016lx", regs[24]);
++      tty->print_cr("x25 = 0x%016lx", regs[25]);
++      tty->print_cr("x26 = 0x%016lx", regs[26]);
++      tty->print_cr("x27 = 0x%016lx", regs[27]);
++      tty->print_cr("x28 = 0x%016lx", regs[28]);
++      tty->print_cr("x30 = 0x%016lx", regs[30]);
++      tty->print_cr("x31 = 0x%016lx", regs[31]);
 +      BREAKPOINT;
 +    }
-+    ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
-+  } else {
-+    ttyLocker ttyl;
-+    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
-+    assert(false, "DEBUG MESSAGE: %s", msg);
 +  }
++  fatal("DEBUG MESSAGE: %s", msg);
 +}
 +
 +void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
@@ -18873,13 +19392,12 @@ index 000000000..5d6078bb3
 +
 +void MacroAssembler::stop(const char* msg) {
 +  address ip = pc();
-+  push_reg(RegSet::range(x0, x31), sp);
-+  if(msg != NULL && ip != NULL) {
-+    mv(c_rarg0, (uintptr_t)(address)msg);
-+    mv(c_rarg1, (uintptr_t)(address)ip);
-+  } else {
-+    ShouldNotReachHere();
-+  }
++  pusha();
++  // The length of the instruction sequence emitted should be independent
++  // of the values of msg and ip so that the size of mach nodes for scratch
++  // emit and normal emit matches.
++  mv(c_rarg0, (address)msg);
++  mv(c_rarg1, (address)ip);
 +  mv(c_rarg2, sp);
 +  mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
 +  jalr(c_rarg3);
@@ -19079,29 +19597,23 @@ index 000000000..5d6078bb3
 +}
 +
 +void MacroAssembler::la(Register Rd, const Address &adr) {
-+  code_section()->relocate(pc(), adr.rspec());
++  InstructionMark im(this);
++  code_section()->relocate(inst_mark(), adr.rspec());
 +  relocInfo::relocType rtype = adr.rspec().reloc()->type();
 +
-+  switch(adr.getMode()) {
++  switch (adr.getMode()) {
 +    case Address::literal: {
 +      if (rtype == relocInfo::none) {
-+        mv(Rd, (intptr_t)(adr.target()));
++        li(Rd, (intptr_t)(adr.target()));
 +      } else {
 +        movptr(Rd, adr.target());
 +      }
 +      break;
 +    }
-+    case Address::base_plus_offset:{
-+      Register base = adr.base();
-+      int64_t offset = adr.offset();
-+      if (offset == 0 && Rd != base) {
-+        mv(Rd, base);
-+      } else if (offset != 0 && Rd != base) {
-+        add(Rd, base, offset, Rd);
-+      } else if (offset != 0 && Rd == base) {
-+        Register tmp = (Rd == t0) ? t1 : t0;
-+        add(base, base, offset, tmp);
-+      }
++    case Address::base_plus_offset: {
++      int32_t offset = 0;
++      baseOffset(Rd, adr, offset);
++      addi(Rd, Rd, offset);
 +      break;
 +    }
 +    default:
@@ -19144,26 +19656,31 @@ index 000000000..5d6078bb3
 +
 +  INSN(beq, feq, bnez);
 +  INSN(bne, feq, beqz);
++
 +#undef INSN
 +
 +
 +#define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
 +  void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
 +                                    bool is_far, bool is_unordered) {                 \
-+    if(is_unordered) {                                                                \
++    if (is_unordered) {                                                               \
++      /* jump if either source is NaN or condition is expected */                     \
 +      FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
 +      beqz(t0, l, is_far);                                                            \
 +    } else {                                                                          \
++      /* jump if no NaN in source and condition is expected */                        \
 +      FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
 +      bnez(t0, l, is_far);                                                            \
 +    }                                                                                 \
 +  }                                                                                   \
 +  void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
 +                                     bool is_far, bool is_unordered) {                \
-+    if(is_unordered) {                                                                \
++    if (is_unordered) {                                                               \
++      /* jump if either source is NaN or condition is expected */                     \
 +      FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
 +      beqz(t0, l, is_far);                                                            \
 +    } else {                                                                          \
++      /* jump if no NaN in source and condition is expected */                        \
 +      FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
 +      bnez(t0, l, is_far);                                                            \
 +    }                                                                                 \
@@ -19271,110 +19788,6 @@ index 000000000..5d6078bb3
 +
 +#undef INSN
 +
-+#ifdef COMPILER2
-+
-+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
-+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
-+                                                              bool is_far, bool is_unordered);
-+
-+static conditional_branch_insn conditional_branches[] =
-+{
-+  /* SHORT branches */
-+  (conditional_branch_insn)&Assembler::beq,
-+  (conditional_branch_insn)&Assembler::bgt,
-+  NULL, // BoolTest::overflow
-+  (conditional_branch_insn)&Assembler::blt,
-+  (conditional_branch_insn)&Assembler::bne,
-+  (conditional_branch_insn)&Assembler::ble,
-+  NULL, // BoolTest::no_overflow
-+  (conditional_branch_insn)&Assembler::bge,
-+
-+  /* UNSIGNED branches */
-+  (conditional_branch_insn)&Assembler::beq,
-+  (conditional_branch_insn)&Assembler::bgtu,
-+  NULL,
-+  (conditional_branch_insn)&Assembler::bltu,
-+  (conditional_branch_insn)&Assembler::bne,
-+  (conditional_branch_insn)&Assembler::bleu,
-+  NULL,
-+  (conditional_branch_insn)&Assembler::bgeu
-+};
-+
-+static float_conditional_branch_insn float_conditional_branches[] =
-+{
-+  /* FLOAT SHORT branches */
-+  (float_conditional_branch_insn)&MacroAssembler::float_beq,
-+  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
-+  NULL,  // BoolTest::overflow
-+  (float_conditional_branch_insn)&MacroAssembler::float_blt,
-+  (float_conditional_branch_insn)&MacroAssembler::float_bne,
-+  (float_conditional_branch_insn)&MacroAssembler::float_ble,
-+  NULL, // BoolTest::no_overflow
-+  (float_conditional_branch_insn)&MacroAssembler::float_bge,
-+
-+  /* DOUBLE SHORT branches */
-+  (float_conditional_branch_insn)&MacroAssembler::double_beq,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
-+  NULL,
-+  (float_conditional_branch_insn)&MacroAssembler::double_blt,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bne,
-+  (float_conditional_branch_insn)&MacroAssembler::double_ble,
-+  NULL,
-+  (float_conditional_branch_insn)&MacroAssembler::double_bge
-+};
-+
-+void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
-+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
-+         "invalid conditional branch index");
-+  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
-+}
-+
-+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
-+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
-+void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
-+  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
-+         "invalid float conditional branch index");
-+  int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask);
-+  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
-+   (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
-+}
-+
-+void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-+  switch (cmpFlag) {
-+    case BoolTest::eq:
-+    case BoolTest::le:
-+      beqz(op1, L, is_far);
-+      break;
-+    case BoolTest::ne:
-+    case BoolTest::gt:
-+      bnez(op1, L, is_far);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
-+
-+void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
-+  switch (cmpFlag) {
-+    case BoolTest::eq:
-+      beqz(op1, L, is_far);
-+      break;
-+    case BoolTest::ne:
-+      bnez(op1, L, is_far);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
-+
-+void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
-+  Label L;
-+  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
-+  mv(dst, src);
-+  bind(L);
-+}
-+#endif
-+
 +void MacroAssembler::push_reg(Register Rs)
 +{
 +  addi(esp, esp, 0 - wordSize);
@@ -19390,7 +19803,7 @@ index 000000000..5d6078bb3
 +int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
 +  int count = 0;
 +  // Scan bitset to accumulate register pairs
-+  for (int reg = 31; reg >= 0; reg --) {
++  for (int reg = 31; reg >= 0; reg--) {
 +    if ((1U << 31) & bitset) {
 +      regs[count++] = reg;
 +    }
@@ -19403,6 +19816,7 @@ index 000000000..5d6078bb3
 +// Return the number of words pushed
 +int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
 +  DEBUG_ONLY(int words_pushed = 0;)
++  CompressibleRegion cr(this);
 +
 +  unsigned char regs[32];
 +  int count = bitset_to_regs(bitset, regs);
@@ -19424,6 +19838,7 @@ index 000000000..5d6078bb3
 +
 +int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
 +  DEBUG_ONLY(int words_popped = 0;)
++  CompressibleRegion cr(this);
 +
 +  unsigned char regs[32];
 +  int count = bitset_to_regs(bitset, regs);
@@ -19443,13 +19858,52 @@ index 000000000..5d6078bb3
 +  return count;
 +}
 +
-+RegSet MacroAssembler::call_clobbered_registers() {
-+  // Push integer registers x7, x10-x17, x28-x31.
-+  return RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31);
++// Push float registers in the bitset, except sp.
++// Return the number of heapwords pushed.
++int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
++  CompressibleRegion cr(this);
++  int words_pushed = 0;
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  int push_slots = count + (count & 1);
++
++  if (count) {
++    addi(stack, stack, -push_slots * wordSize);
++  }
++
++  for (int i = count - 1; i >= 0; i--) {
++    fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
++    words_pushed++;
++  }
++
++  assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
++  return count;
++}
++
++int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
++  CompressibleRegion cr(this);
++  int words_popped = 0;
++  unsigned char regs[32];
++  int count = bitset_to_regs(bitset, regs);
++  int pop_slots = count + (count & 1);
++
++  for (int i = count - 1; i >= 0; i--) {
++    fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
++    words_popped++;
++  }
++
++  if (count) {
++    addi(stack, stack, pop_slots * wordSize);
++  }
++
++  assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
++  return count;
 +}
 +
-+void MacroAssembler::push_call_clobbered_registers() {
-+  push_reg(call_clobbered_registers(), sp);
++void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
++  CompressibleRegion cr(this);
++  // Push integer registers x7, x10-x17, x28-x31.
++  push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
 +
 +  // Push float registers f0-f7, f10-f17, f28-f31.
 +  addi(sp, sp, - wordSize * 20);
@@ -19461,7 +19915,8 @@ index 000000000..5d6078bb3
 +  }
 +}
 +
-+void MacroAssembler::pop_call_clobbered_registers() {
++void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
++  CompressibleRegion cr(this);
 +  int offset = 0;
 +  for (int i = 0; i < 32; i++) {
 +    if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
@@ -19470,39 +19925,35 @@ index 000000000..5d6078bb3
 +  }
 +  addi(sp, sp, wordSize * 20);
 +
-+  pop_reg(call_clobbered_registers(), sp);
++  pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
++}
++
++// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
++void MacroAssembler::pusha() {
++  CompressibleRegion cr(this);
++  push_reg(0xffffffe2, sp);
++}
++
++// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
++void MacroAssembler::popa() {
++  CompressibleRegion cr(this);
++  pop_reg(0xffffffe2, sp);
 +}
 +
-+void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
++void MacroAssembler::push_CPU_state() {
++  CompressibleRegion cr(this);
 +  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
-+  push_reg(RegSet::range(x5, x31), sp);
++  push_reg(0xffffffe0, sp);
 +
 +  // float registers
 +  addi(sp, sp, - 32 * wordSize);
 +  for (int i = 0; i < 32; i++) {
 +    fsd(as_FloatRegister(i), Address(sp, i * wordSize));
 +  }
-+
-+ // vector registers
-+ if (save_vectors) {
-+    sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers);
-+    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
-+    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
-+        add(t0, sp, vector_size_in_bytes * i);
-+        vse64_v(as_VectorRegister(i), t0);
-+    }
-+  }
 +}
 +
-+void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
-+  // vector registers
-+  if (restore_vectors) {
-+    vsetvli(t0, x0, Assembler::e64, Assembler::m8);
-+    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
-+      vle64_v(as_VectorRegister(i), sp);
-+      add(sp, sp, vector_size_in_bytes * 8);
-+    }
-+  }
++void MacroAssembler::pop_CPU_state() {
++  CompressibleRegion cr(this);
 +
 +  // float registers
 +  for (int i = 0; i < 32; i++) {
@@ -19511,7 +19962,7 @@ index 000000000..5d6078bb3
 +  addi(sp, sp, 32 * wordSize);
 +
 +  // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
-+  pop_reg(RegSet::range(x5, x31), sp);
++  pop_reg(0xffffffe0, sp);
 +}
 +
 +static int patch_offset_in_jal(address branch, int64_t offset) {
@@ -19661,10 +20112,14 @@ index 000000000..5d6078bb3
 +    int64_t imm = (intptr_t)target;
 +    return patch_imm_in_li32(branch, (int32_t)imm);
 +  } else {
-+    tty->print_cr("pd_patch_instruction_size: instruction 0x%x could not be patched!\n", *(unsigned*)branch);
++#ifdef ASSERT
++    tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
++                  *(unsigned*)branch, p2i(branch));
++    Disassembler::decode(branch - 16, branch + 16);
++#endif
 +    ShouldNotReachHere();
++    return -1;
 +  }
-+  return -1;
 +}
 +
 +address MacroAssembler::target_addr_for_insn(address insn_addr) {
@@ -19721,6 +20176,13 @@ index 000000000..5d6078bb3
 +  code_section()->relocate(pc(), dest.rspec());
 +  movptr(Rd, dest.target());
 +}
++
++void MacroAssembler::mv(Register Rd, address addr) {
++  // Here in case of use with relocation, use fix length instruction
++  // movptr instead of li
++  movptr(Rd, addr);
++}
++
 +void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
 +  if (src.is_register()) {
 +    mv(Rd, src.as_register());
@@ -19795,22 +20257,6 @@ index 000000000..5d6078bb3
 +  }
 +}
 +
-+// rotate right with imm bits
-+void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
-+{
-+  if (UseZbb) {
-+    rori(dst, src, shift);
-+    return;
-+  }
-+
-+  assert_different_registers(dst, tmp);
-+  assert_different_registers(src, tmp);
-+  assert(shift < 64, "shift amount must be < 64");
-+  slli(tmp, src, 64 - shift);
-+  srli(dst, src, shift);
-+  orr(dst, dst, tmp);
-+}
-+
 +// reverse bytes in halfword in lower 16 bits and sign-extend
 +// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
 +void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
@@ -19894,6 +20340,7 @@ index 000000000..5d6078bb3
 +  slli(Rd, Rs, 16);
 +  orr(Rd, Rd, tmp1);
 +}
++
 +// reverse bytes in each halfword
 +// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
 +void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
@@ -19954,12 +20401,28 @@ index 000000000..5d6078bb3
 +  orr(Rd, tmp1, Rd);
 +}
 +
++// rotate right with shift bits
++void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
++{
++  if (UseZbb) {
++    rori(dst, src, shift);
++    return;
++  }
++
++  assert_different_registers(dst, tmp);
++  assert_different_registers(src, tmp);
++  assert(shift < 64, "shift amount must be < 64");
++  slli(tmp, src, 64 - shift);
++  srli(dst, src, shift);
++  orr(dst, dst, tmp);
++}
++
 +void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
 +  if (is_imm_in_range(imm, 12, 0)) {
 +    and_imm12(Rd, Rn, imm);
 +  } else {
 +    assert_different_registers(Rn, tmp);
-+    mv(tmp, imm);
++    li(tmp, imm);
 +    andr(Rd, Rn, tmp);
 +  }
 +}
@@ -19969,11 +20432,11 @@ index 000000000..5d6078bb3
 +  if (src.is_register()) {
 +    orr(tmp1, tmp1, src.as_register());
 +  } else {
-+    if(is_imm_in_range(src.as_constant(), 12, 0)) {
++    if (is_imm_in_range(src.as_constant(), 12, 0)) {
 +      ori(tmp1, tmp1, src.as_constant());
 +    } else {
 +      assert_different_registers(tmp1, tmp2);
-+      mv(tmp2, src.as_constant());
++      li(tmp2, src.as_constant());
 +      orr(tmp1, tmp1, tmp2);
 +    }
 +  }
@@ -19996,7 +20459,7 @@ index 000000000..5d6078bb3
 +}
 +
 +// Move an oop into a register.  immediate is true if we want
-+// immediate instrcutions, i.e. we are not going to patch this
++// immediate instructions, i.e. we are not going to patch this
 +// instruction while the code is being executed by another thread.  In
 +// that case we can use move immediates rather than the constant pool.
 +void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
@@ -20062,6 +20525,7 @@ index 000000000..5d6078bb3
 +}
 +
 +SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
++  assert_cond(masm != NULL);
 +  int32_t offset = 0;
 +  _masm = masm;
 +  _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
@@ -20070,6 +20534,7 @@ index 000000000..5d6078bb3
 +}
 +
 +SkipIfEqual::~SkipIfEqual() {
++  assert_cond(_masm != NULL);
 +  _masm->bind(_label);
 +  _masm = NULL;
 +}
@@ -20116,14 +20581,14 @@ index 000000000..5d6078bb3
 +
 +void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
 +                                     Address dst, Register src,
-+                                     Register tmp1, Register tmp2, Register tmp3) {
++                                     Register tmp1, Register thread_tmp) {
 +  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
 +  decorators = AccessInternal::decorator_fixup(decorators);
 +  bool as_raw = (decorators & AS_RAW) != 0;
 +  if (as_raw) {
-+    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
++    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
 +  } else {
-+    bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
++    bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
 +  }
 +}
 +
@@ -20200,7 +20665,8 @@ index 000000000..5d6078bb3
 +  }
 +
 +  assert_different_registers(src, xbase);
-+  mv(xbase, (uintptr_t)Universe::narrow_klass_base());
++  li(xbase, (uintptr_t)Universe::narrow_klass_base());
++
 +  if (Universe::narrow_klass_shift() != 0) {
 +    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
 +    assert_different_registers(t0, xbase);
@@ -20208,8 +20674,8 @@ index 000000000..5d6078bb3
 +  } else {
 +    add(dst, xbase, src);
 +  }
-+  if (xbase == xheapbase) { reinit_heapbase(); }
 +
++  if (xbase == xheapbase) { reinit_heapbase(); }
 +}
 +
 +void MacroAssembler::encode_klass_not_null(Register r) {
@@ -20241,7 +20707,7 @@ index 000000000..5d6078bb3
 +  }
 +
 +  assert_different_registers(src, xbase);
-+  mv(xbase, (intptr_t)Universe::narrow_klass_base());
++  li(xbase, (intptr_t)Universe::narrow_klass_base());
 +  sub(dst, src, xbase);
 +  if (Universe::narrow_klass_shift() != 0) {
 +    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
@@ -20290,8 +20756,8 @@ index 000000000..5d6078bb3
 +}
 +
 +void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
-+                                    Register tmp2, Register tmp3, DecoratorSet decorators) {
-+  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3);
++                                    Register thread_tmp, DecoratorSet decorators) {
++  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
 +}
 +
 +void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
@@ -20306,7 +20772,7 @@ index 000000000..5d6078bb3
 +
 +// Used for storing NULLs.
 +void MacroAssembler::store_heap_oop_null(Address dst) {
-+  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
++  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
 +}
 +
 +int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
@@ -20394,7 +20860,7 @@ index 000000000..5d6078bb3
 +    if (itable_index.is_register()) {
 +      slli(t0, itable_index.as_register(), 3);
 +    } else {
-+      mv(t0, itable_index.as_constant() << 3);
++      li(t0, itable_index.as_constant() << 3);
 +    }
 +    add(recv_klass, recv_klass, t0);
 +    if (itentry_off) {
@@ -20439,17 +20905,11 @@ index 000000000..5d6078bb3
 +    ld(method_result, Address(method_result, vtable_offset_in_bytes));
 +  } else {
 +    vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
-+    Address addr = form_address(recv_klass,             /* base */
-+                                vtable_offset_in_bytes, /* offset */
-+                                12,                     /* expect offset bits */
-+                                method_result);         /* temp reg */
-+    ld(method_result, addr);
++    ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
 +  }
 +}
 +
 +void MacroAssembler::membar(uint32_t order_constraint) {
-+  if (!os::is_MP()) { return; }
-+
 +  address prev = pc() - NativeMembar::instruction_size;
 +  address last = code()->last_insn();
 +
@@ -20470,6 +20930,21 @@ index 000000000..5d6078bb3
 +  }
 +}
 +
++// Form an addres from base + offset in Rd. Rd my or may not
++// actually be used: you must use the Address that is returned. It
++// is up to you to ensure that the shift provided mathces the size
++// of your data.
++Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) {
++  if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12
++    return Address(base, byte_offset);
++  }
++
++  // Do it the hard way
++  mv(Rd, byte_offset);
++  add(Rd, base, Rd);
++  return Address(Rd);
++}
++
 +void MacroAssembler::check_klass_subtype(Register sub_klass,
 +                                         Register super_klass,
 +                                         Register tmp_reg,
@@ -20480,21 +20955,6 @@ index 000000000..5d6078bb3
 +  bind(L_failure);
 +}
 +
-+// Write serialization page so VM thread can do a pseudo remote membar.
-+// We use the current thread pointer to calculate a thread specific
-+// offset to write to within the page. This minimizes bus traffic
-+// due to cache line collision.
-+void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) {
-+  srli(tmp2, thread, os::get_serialize_page_shift_count());
-+
-+  int mask = os::vm_page_size() - sizeof(int);
-+  andi(tmp2, tmp2, mask, tmp1);
-+
-+  add(tmp1, tmp2, (intptr_t)os::get_memory_serialize_page());
-+  membar(MacroAssembler::AnyAny);
-+  sw(zr, Address(tmp1));
-+}
-+
 +void MacroAssembler::safepoint_poll(Label& slow_path) {
 +  if (SafepointMechanism::uses_thread_local_poll()) {
 +    ld(t1, Address(xthread, Thread::polling_page_offset()));
@@ -20509,30 +20969,6 @@ index 000000000..5d6078bb3
 +  }
 +}
 +
-+// Just like safepoint_poll, but use an acquiring load for thread-
-+// local polling.
-+//
-+// We need an acquire here to ensure that any subsequent load of the
-+// global SafepointSynchronize::_state flag is ordered after this load
-+// of the local Thread::_polling page.  We don't want this poll to
-+// return false (i.e. not safepointing) and a later poll of the global
-+// SafepointSynchronize::_state spuriously to return true.
-+//
-+// This is to avoid a race when we're in a native->Java transition
-+// racing the code which wakes up from a safepoint.
-+//
-+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
-+  if (SafepointMechanism::uses_thread_local_poll()) {
-+    membar(MacroAssembler::AnyAny);
-+    ld(t1, Address(xthread, Thread::polling_page_offset()));
-+    membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+    andi(t0, t1, SafepointMechanism::poll_bit());
-+    bnez(t0, slow_path);
-+  } else {
-+    safepoint_poll(slow_path);
-+  }
-+}
-+
 +void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
 +                                Label &succeed, Label *fail) {
 +  // oldv holds comparison value
@@ -20540,17 +20976,16 @@ index 000000000..5d6078bb3
 +  // addr identifies memory word to compare against/update
 +  Label retry_load, nope;
 +  bind(retry_load);
-+  // flush and load exclusive from the memory location
-+  // and fail if it is not what we expect
++  // Load reserved from the memory location
 +  lr_d(tmp, addr, Assembler::aqrl);
++  // Fail and exit if it is not what we expect
 +  bne(tmp, oldv, nope);
-+  // if we store+flush with no intervening write tmp wil be zero
++  // If the store conditional succeeds, tmp will be zero
 +  sc_d(tmp, newv, addr, Assembler::rl);
 +  beqz(tmp, succeed);
-+  // retry so we only ever return after a load fails to compare
-+  // ensures we don't return a stale value after a failed write.
++  // Retry only when the store conditional failed
 +  j(retry_load);
-+  // if the memory word differs we return it in oldv and signal a fail
++
 +  bind(nope);
 +  membar(AnyAny);
 +  mv(oldv, tmp);
@@ -20616,9 +21051,10 @@ index 000000000..5d6078bb3
 +  andi(aligned_addr, addr, ~3);
 +
 +  if (size == int8) {
-+    mv(mask, 0xff);
++    addi(mask, zr, 0xff);
 +  } else {
-+    mv(mask, -1);
++    // size == int16 case
++    addi(mask, zr, -1);
 +    zero_extend(mask, mask, 16);
 +  }
 +  sll(mask, mask, shift);
@@ -20658,7 +21094,7 @@ index 000000000..5d6078bb3
 +  bnez(tmp, retry);
 +
 +  if (result_as_bool) {
-+    mv(result, 1);
++    addi(result, zr, 1);
 +    j(done);
 +
 +    bind(fail);
@@ -20670,16 +21106,16 @@ index 000000000..5d6078bb3
 +
 +    bind(fail);
 +    srl(result, tmp, shift);
-+  }
 +
-+  if (size == int8) {
-+    sign_extend(result, result, 8);
-+  } else if (size == int16) {
-+    sign_extend(result, result, 16);
++    if (size == int8) {
++      sign_extend(result, result, 8);
++    } else {
++      // size == int16 case
++      sign_extend(result, result, 16);
++    }
 +  }
 +}
 +
-+// weak cmpxchg narrow value will kill t0, t1, expected, new_val and tmps.
 +// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
 +// the weak CAS stuff. The major difference is that it just failed when store conditional
 +// failed.
@@ -20693,7 +21129,7 @@ index 000000000..5d6078bb3
 +  assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
 +  cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
 +
-+  Label fail, done;
++  Label succ, fail, done;
 +
 +  lr_w(old, aligned_addr, acquire);
 +  andr(tmp, old, mask);
@@ -20702,14 +21138,13 @@ index 000000000..5d6078bb3
 +  andr(tmp, old, not_mask);
 +  orr(tmp, tmp, new_val);
 +  sc_w(tmp, tmp, aligned_addr, release);
-+  bnez(tmp, fail);
++  beqz(tmp, succ);
 +
-+  // Success
-+  mv(result, 1);
++  bind(fail);
++  addi(result, zr, 1);
 +  j(done);
 +
-+  // Fail
-+  bind(fail);
++  bind(succ);
 +  mv(result, zr);
 +
 +  bind(done);
@@ -20731,7 +21166,7 @@ index 000000000..5d6078bb3
 +
 +  // equal, succeed
 +  if (result_as_bool) {
-+    mv(result, 1);
++    li(result, 1);
 +  } else {
 +    mv(result, expected);
 +  }
@@ -20753,22 +21188,20 @@ index 000000000..5d6078bb3
 +                                  enum operand_size size,
 +                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
 +                                  Register result) {
-+  assert(size != int8 && size != int16, "unsupported operand size");
-+
-+  Label fail, done;
++  Label fail, done, sc_done;
 +  load_reserved(addr, size, acquire);
 +  bne(t0, expected, fail);
 +  store_conditional(addr, new_val, size, release);
-+  bnez(t0, fail);
-+
-+  // Success
-+  mv(result, 1);
-+  j(done);
++  beqz(t0, sc_done);
 +
-+  // Fail
++  // fail
 +  bind(fail);
-+  mv(result, zr);
++  li(result, 1);
++  j(done);
 +
++  // sc_done
++  bind(sc_done);
++  mv(result, 0);
 +  bind(done);
 +}
 +
@@ -20817,229 +21250,7 @@ index 000000000..5d6078bb3
 +
 +#undef ATOMIC_XCHGU
 +
-+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) {
-+  assert(UseBiasedLocking, "why call this otherwise?");
-+
-+  // Check for biased locking unlock case, which is a no-op
-+  // Note: we do not have to check the thread ID for two reasons.
-+  // First, the interpreter checks for IllegalMonitorStateException at
-+  // a higher level. Second, if the bias was revoked while we held the
-+  // lock, the object could not be rebiased toward another thread, so
-+  // the bias bit would be clear.
-+  ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-+  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); // 1 << 3
-+  sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern);
-+  if (flag->is_valid()) { mv(flag, tmp_reg); }
-+  beqz(tmp_reg, done);
-+}
-+
-+void MacroAssembler::load_prototype_header(Register dst, Register src) {
-+  load_klass(dst, src);
-+  ld(dst, Address(dst, Klass::prototype_header_offset()));
-+}
-+
-+int MacroAssembler::biased_locking_enter(Register lock_reg,
-+                                         Register obj_reg,
-+                                         Register swap_reg,
-+                                         Register tmp_reg,
-+                                         bool swap_reg_contains_mark,
-+                                         Label& done,
-+                                         Label* slow_case,
-+                                         BiasedLockingCounters* counters,
-+                                         Register flag) {
-+  assert(UseBiasedLocking, "why call this otherwise?");
-+  assert_different_registers(lock_reg, obj_reg, swap_reg);
-+
-+  if (PrintBiasedLockingStatistics && counters == NULL) {
-+    counters = BiasedLocking::counters();
-+  }
-+
-+  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0, flag);
-+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
-+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
-+
-+  // Biased locking
-+  // See whether the lock is currently biased toward our thread and
-+  // whether the epoch is still valid
-+  // Note that the runtime guarantees sufficient alignment of JavaThread
-+  // pointers to allow age to be placed into low bits
-+  // First check to see whether biasing is even enabled for this object
-+  Label cas_label;
-+  int null_check_offset = -1;
-+  if (!swap_reg_contains_mark) {
-+    null_check_offset = offset();
-+    ld(swap_reg, mark_addr);
-+  }
-+  andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
-+  xori(t0, tmp_reg, markOopDesc::biased_lock_pattern);
-+  bnez(t0, cas_label); // don't care flag unless jumping to done
-+  // The bias pattern is present in the object's header. Need to check
-+  // whether the bias owner and the epoch are both still current.
-+  load_prototype_header(tmp_reg, obj_reg);
-+  orr(tmp_reg, tmp_reg, xthread);
-+  xorr(tmp_reg, swap_reg, tmp_reg);
-+  andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
-+  if (flag->is_valid()) {
-+    mv(flag, tmp_reg);
-+  }
-+
-+  if (counters != NULL) {
-+    Label around;
-+    bnez(tmp_reg, around);
-+    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0);
-+    j(done);
-+    bind(around);
-+  } else {
-+    beqz(tmp_reg, done);
-+  }
-+
-+  Label try_revoke_bias;
-+  Label try_rebias;
-+
-+  // At this point we know that the header has the bias pattern and
-+  // that we are not the bias owner in the current epoch. We need to
-+  // figure out more details about the state of the header in order to
-+  // know what operations can be legally performed on the object's
-+  // header.
-+
-+  // If the low three bits in the xor result aren't clear, that means
-+  // the prototype header is no longer biased and we have to revoke
-+  // the bias on this object.
-+  andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place);
-+  bnez(t0, try_revoke_bias);
-+
-+  // Biasing is still enabled for this data type. See whether the
-+  // epoch of the current bias is still valid, meaning that the epoch
-+  // bits of the mark word are equal to the epoch bits of the
-+  // prototype header. (Note that the prototype header's epoch bits
-+  // only change at a safepoint.) If not, attempt to rebias the object
-+  // toward the current thread. Note that we must be absolutely sure
-+  // that the current epoch is invalid in order to do this because
-+  // otherwise the manipulations it performs on the mark word are
-+  // illegal.
-+  andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place);
-+  bnez(t0, try_rebias);
-+
-+  // The epoch of the current bias is still valid but we know nothing
-+  // about the owner; it might be set or it might be clear. Try to
-+  // acquire the bias of the object using an atomic operation. If this
-+  // fails we will go in to the runtime to revoke the object's bias.
-+  // Note that we first construct the presumed unbiased header so we
-+  // don't accidentally blow away another thread's valid bias.
-+  {
-+    Label cas_success;
-+    Label counter;
-+    mv(t0, (int64_t)(markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
-+    andr(swap_reg, swap_reg, t0);
-+    orr(tmp_reg, swap_reg, xthread);
-+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
-+    // cas failed here if slow_cass == NULL
-+    if (flag->is_valid()) {
-+      mv(flag, 1);
-+      j(counter);
-+    }
-+
-+    // If the biasing toward our thread failed, this means that
-+    // another thread succeeded in biasing it toward itself and we
-+    // need to revoke that bias. The revocation will occur in the
-+    // interpreter runtime in the slow case.
-+    bind(cas_success);
-+    if (flag->is_valid()) {
-+      mv(flag, 0);
-+      bind(counter);
-+    }
-+
-+    if (counters != NULL) {
-+      atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
-+                  tmp_reg, t0);
-+    }
-+  }
-+  j(done);
-+
-+  bind(try_rebias);
-+  // At this point we know the epoch has expired, meaning that the
-+  // current "bias owner", if any, is actually invalid. Under these
-+  // circumstances _only_, we are allowed to use the current header's
-+  // value as the comparison value when doing the cas to acquire the
-+  // bias in the current epoch. In other words, we allow transfer of
-+  // the bias from one thread to another directly in this situation.
-+  //
-+  // FIXME: due to a lack of registers we currently blow away the age
-+  // bits in this situation. Should attempt to preserve them.
-+  {
-+    Label cas_success;
-+    Label counter;
-+    load_prototype_header(tmp_reg, obj_reg);
-+    orr(tmp_reg, xthread, tmp_reg);
-+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
-+    // cas failed here if slow_cass == NULL
-+    if (flag->is_valid()) {
-+      mv(flag, 1);
-+      j(counter);
-+    }
-+
-+    // If the biasing toward our thread failed, then another thread
-+    // succeeded in biasing it toward itself and we need to revoke that
-+    // bias. The revocation will occur in the runtime in the slow case.
-+    bind(cas_success);
-+    if (flag->is_valid()) {
-+      mv(flag, 0);
-+      bind(counter);
-+    }
-+
-+    if (counters != NULL) {
-+      atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
-+                  tmp_reg, t0);
-+    }
-+  }
-+  j(done);
-+
-+  // don't care flag unless jumping to done
-+  bind(try_revoke_bias);
-+  // The prototype mark in the klass doesn't have the bias bit set any
-+  // more, indicating that objects of this data type are not supposed
-+  // to be biased any more. We are going to try to reset the mark of
-+  // this object to the prototype value and fall through to the
-+  // CAS-based locking scheme. Note that if our CAS fails, it means
-+  // that another thread raced us for the privilege of revoking the
-+  // bias of this particular object, so it's okay to continue in the
-+  // normal locking code.
-+  //
-+  // FIXME: due to a lack of registers we currently blow away the age
-+  // bits in this situation. Should attempt to preserve them.
-+  {
-+    Label cas_success, nope;
-+    load_prototype_header(tmp_reg, obj_reg);
-+    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope);
-+    bind(cas_success);
-+
-+    // Fall through to the normal CAS-based lock, because no matter what
-+    // the result of the above CAS, some thread must have succeeded in
-+    // removing the bias bit from the object's header.
-+    if (counters != NULL) {
-+      atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
-+                  t0);
-+    }
-+    bind(nope);
-+  }
-+
-+  bind(cas_label);
-+
-+  return null_check_offset;
-+}
-+
-+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
-+  Label retry_load;
-+  bind(retry_load);
-+  // flush and load exclusive from the memory location
-+  lr_w(tmp, counter_addr);
-+  addw(tmp, tmp, 1);
-+  // if we store+flush with no intervening write tmp wil be zero
-+  sc_w(tmp, tmp, counter_addr);
-+  bnez(tmp, retry_load);
-+}
-+
-+void MacroAssembler::far_jump(Address entry, Register tmp) {
++void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
 +  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
 +  assert(CodeCache::find_blob(entry.target()) != NULL,
 +         "destination of far call not found in code cache");
@@ -21048,13 +21259,15 @@ index 000000000..5d6078bb3
 +    // We can use auipc + jalr here because we know that the total size of
 +    // the code cache cannot exceed 2Gb.
 +    la_patchable(tmp, entry, offset);
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
 +    jalr(x0, tmp, offset);
 +  } else {
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
 +    j(entry);
 +  }
 +}
 +
-+void MacroAssembler::far_call(Address entry, Register tmp) {
++void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
 +  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
 +  assert(CodeCache::find_blob(entry.target()) != NULL,
 +         "destination of far call not found in code cache");
@@ -21063,8 +21276,10 @@ index 000000000..5d6078bb3
 +    // We can use auipc + jalr here because we know that the total size of
 +    // the code cache cannot exceed 2Gb.
 +    la_patchable(tmp, entry, offset);
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
 +    jalr(x1, tmp, offset); // link
 +  } else {
++    if (cbuf != NULL) { cbuf->set_insts_mark(); }
 +    jal(entry); // link
 +  }
 +}
@@ -21079,7 +21294,7 @@ index 000000000..5d6078bb3
 +  assert_different_registers(sub_klass, super_klass, tmp_reg);
 +  bool must_load_sco = (super_check_offset == noreg);
 +  if (must_load_sco) {
-+    assert(tmp_reg != noreg, "supply either a tmp or a register offset");
++    assert(tmp_reg != noreg, "supply either a temp or a register offset");
 +  } else {
 +    assert_different_registers(sub_klass, super_klass, super_check_offset);
 +  }
@@ -21160,15 +21375,15 @@ index 000000000..5d6078bb3
 +
 +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
 +                                                   Register super_klass,
-+                                                   Register tmp_reg,
++                                                   Register tmp1_reg,
 +                                                   Register tmp2_reg,
 +                                                   Label* L_success,
 +                                                   Label* L_failure) {
-+  assert_different_registers(sub_klass, super_klass, tmp_reg);
++  assert_different_registers(sub_klass, super_klass, tmp1_reg);
 +  if (tmp2_reg != noreg) {
-+    assert_different_registers(sub_klass, super_klass, tmp_reg, tmp2_reg, t0);
++    assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
 +  }
-+#define IS_A_TEMP(reg) ((reg) == tmp_reg || (reg) == tmp2_reg)
++#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
 +
 +  Label L_fallthrough;
 +  int label_nulls = 0;
@@ -21177,7 +21392,7 @@ index 000000000..5d6078bb3
 +
 +  assert(label_nulls <= 1, "at most one NULL in the batch");
 +
-+  // A couple of useful fields in sub_klass:
++  // A couple of usefule fields in sub_klass:
 +  int ss_offset = in_bytes(Klass::secondary_supers_offset());
 +  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 +  Address secondary_supers_addr(sub_klass, ss_offset);
@@ -21228,14 +21443,14 @@ index 000000000..5d6078bb3
 +  add(x15, x15, Array<Klass*>::base_offset_in_bytes());
 +
 +  // Set t0 to an obvious invalid value, falling through by default
-+  mv(t0, -1);
++  li(t0, -1);
 +  // Scan X12 words at [X15] for an occurrence of X10.
 +  repne_scan(x15, x10, x12, t0);
 +
 +  // pop will restore x10, so we should use a temp register to keep its value
 +  mv(t1, x10);
 +
-+  // Unspill the temp. registers:
++  // Unspill the temp registers:
 +  pop_reg(pushed_registers, sp);
 +
 +  bne(t1, t0, *L_failure);
@@ -21268,26 +21483,28 @@ index 000000000..5d6078bb3
 +void MacroAssembler::eden_allocate(Register obj,
 +                                   Register var_size_in_bytes,
 +                                   int con_size_in_bytes,
-+                                   Register tmp1,
++                                   Register tmp,
 +                                   Label& slow_case,
 +                                   bool is_far) {
 +  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, is_far);
++  bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far);
 +}
 +
 +
 +// get_thread() can be called anywhere inside generated code so we
 +// need to save whatever non-callee save context might get clobbered
-+// by the call to Thread::current() or, indeed, the call setup code
++// by the call to Thread::current() or, indeed, the call setup code.
 +void MacroAssembler::get_thread(Register thread) {
 +  // save all call-clobbered regs except thread
-+  RegSet saved_regs = RegSet::of(x10) + ra - thread;
++  RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
++                      RegSet::range(x28, x31) + ra - thread;
 +  push_reg(saved_regs, sp);
 +
-+  mv(ra, CAST_FROM_FN_PTR(address, Thread::current));
-+  jalr(ra);
-+  if (thread != c_rarg0) {
-+    mv(thread, c_rarg0);
++  int32_t offset = 0;
++  movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset);
++  jalr(ra, ra, offset);
++  if (thread != x10) {
++    mv(thread, x10);
 +  }
 +
 +  // restore pushed registers
@@ -21295,8 +21512,9 @@ index 000000000..5d6078bb3
 +}
 +
 +void MacroAssembler::load_byte_map_base(Register reg) {
-+  jbyte *byte_map_base = ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
-+  mv(reg, (uint64_t)byte_map_base);
++  jbyte *byte_map_base =
++    ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
++  li(reg, (uint64_t)byte_map_base);
 +}
 +
 +void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
@@ -21310,11 +21528,12 @@ index 000000000..5d6078bb3
 +  assert(is_valid_riscv64_address(dest.target()), "bad address");
 +  assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
 +
-+  code_section()->relocate(pc(), dest.rspec());
++  InstructionMark im(this);
++  code_section()->relocate(inst_mark(), dest.rspec());
 +  // RISC-V doesn't compute a page-aligned address, in order to partially
 +  // compensate for the use of *signed* offsets in its base+disp12
 +  // addressing mode (RISC-V's PC-relative reach remains asymmetric
-+  // [-(2G + 2K), 2G - 2K)).
++  // [-(2G + 2K), 2G - 2k).
 +  if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
 +    int64_t distance = dest.target() - pc();
 +    auipc(reg1, (int32_t)distance + 0x800);
@@ -21325,7 +21544,8 @@ index 000000000..5d6078bb3
 +}
 +
 +void MacroAssembler::build_frame(int framesize) {
-+  assert(framesize > 0, "framesize must be > 0");
++  assert(framesize >= 2, "framesize must include space for FP/RA");
++  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
 +  sub(sp, sp, framesize);
 +  sd(fp, Address(sp, framesize - 2 * wordSize));
 +  sd(ra, Address(sp, framesize - wordSize));
@@ -21333,7 +21553,8 @@ index 000000000..5d6078bb3
 +}
 +
 +void MacroAssembler::remove_frame(int framesize) {
-+  assert(framesize > 0, "framesize must be > 0");
++  assert(framesize >= 2, "framesize must include space for FP/RA");
++  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
 +  ld(fp, Address(sp, framesize - 2 * wordSize));
 +  ld(ra, Address(sp, framesize - wordSize));
 +  add(sp, sp, framesize);
@@ -21364,6 +21585,222 @@ index 000000000..5d6078bb3
 +    bind(no_reserved_zone_enabling);
 +}
 +
++void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
++  Label retry_load;
++  bind(retry_load);
++  // flush and load exclusive from the memory location
++  lr_w(tmp, counter_addr);
++  addw(tmp, tmp, 1);
++  // if we store+flush with no intervening write tmp wil be zero
++  sc_w(tmp, tmp, counter_addr);
++  bnez(tmp, retry_load);
++}
++
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld(dst, Address(dst, Klass::prototype_header_offset()));
++}
++
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters,
++                                         Register flag) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  assert_different_registers(lock_reg, obj_reg, swap_reg);
++
++  if (PrintBiasedLockingStatistics && counters == NULL)
++    counters = BiasedLocking::counters();
++
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0);
++  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  int null_check_offset = -1;
++  if (!swap_reg_contains_mark) {
++    null_check_offset = offset();
++    ld(swap_reg, mark_addr);
++  }
++  andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place);
++  li(t0, markOopDesc::biased_lock_pattern);
++  bne(t0, tmp_reg, cas_label);
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  load_prototype_header(tmp_reg, obj_reg);
++  orr(tmp_reg, tmp_reg, xthread);
++  xorr(tmp_reg, swap_reg, tmp_reg);
++  andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place));
++  if (flag->is_valid()) {
++    mv(flag, tmp_reg);
++  }
++  if (counters != NULL) {
++    Label around;
++    bnez(tmp_reg, around);
++    atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0);
++    j(done);
++    bind(around);
++  } else {
++    beqz(tmp_reg, done);
++  }
++
++  Label try_revoke_bias;
++  Label try_rebias;
++
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
++
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++  andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  bnez(t0, try_revoke_bias);
++
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++  andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place);
++  bnez(t0, try_rebias);
++
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++  {
++    Label cas_success;
++    Label counter;
++    mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
++    andr(swap_reg, swap_reg, t0);
++    orr(tmp_reg, swap_reg, xthread);
++    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
++    // cas failed here if slow_cass == NULL
++    if (flag->is_valid()) {
++      mv(flag, 1);
++      j(counter);
++    }
++    // If the biasing toward our thread failed, this means that
++    // another thread succeeded in biasing it toward itself and we
++    // need to revoke that bias. The revocation will occur in the
++    // interpreter runtime in the slow case.
++    bind(cas_success);
++    if (flag->is_valid()) {
++      mv(flag, 0);
++      bind(counter);
++    }
++    if (counters != NULL) {
++      atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()),
++                  tmp_reg, t0);
++    }
++  }
++  j(done);
++
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  {
++    Label cas_success;
++    Label counter;
++    load_prototype_header(tmp_reg, obj_reg);
++    orr(tmp_reg, xthread, tmp_reg);
++    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case);
++    // cas failed here if slow_cass == NULL
++    if (flag->is_valid()) {
++      mv(flag, 1);
++      j(counter);
++    }
++
++    // If the biasing toward our thread failed, then another thread
++    // succeeded in biasing it toward itself and we need to revoke that
++    // bias. The revocation will occur in the runtime in the slow case.
++    bind(cas_success);
++    if (flag->is_valid()) {
++      mv(flag, 0);
++      bind(counter);
++    }
++    if (counters != NULL) {
++      atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()),
++                  tmp_reg, t0);
++    }
++  }
++  j(done);
++
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  {
++    Label cas_success, nope;
++    load_prototype_header(tmp_reg, obj_reg);
++    cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope);
++    bind(cas_success);
++
++    // Fall through to the normal CAS-based lock, because no matter what
++    // the result of the above CAS, some thread must have succeeded in
++    // removing the bias bit from the object's header.
++    if (counters != NULL) {
++      atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg,
++                  t0);
++    }
++    bind(nope);
++  }
++
++  bind(cas_label);
++
++  return null_check_offset;
++}
++
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern);
++  if (flag->is_valid()) { mv(flag, tmp_reg); }
++  beqz(tmp_reg, done);
++}
++
 +// Move the address of the polling page into dest.
 +void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) {
 +  if (SafepointMechanism::uses_thread_local_poll()) {
@@ -21375,7 +21812,8 @@ index 000000000..5d6078bb3
 +  }
 +}
 +
-+// Move the address of the polling page into dest.
++// Read the polling page.  The address of the polling page must
++// already be in r.
 +void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) {
 +  int32_t offset = 0;
 +  get_polling_page(dest, page, offset, rtype);
@@ -21384,9 +21822,9 @@ index 000000000..5d6078bb3
 +
 +// Read the polling page.  The address of the polling page must
 +// already be in r.
-+void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
++void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) {
 +  code_section()->relocate(pc(), rtype);
-+  lwu(zr, Address(r, offset));
++  lwu(zr, Address(dest, offset));
 +}
 +
 +void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
@@ -21400,8 +21838,9 @@ index 000000000..5d6078bb3
 +  }
 +#endif
 +  int oop_index = oop_recorder()->find_index(obj);
++  InstructionMark im(this);
 +  RelocationHolder rspec = oop_Relocation::spec(oop_index);
-+  code_section()->relocate(pc(), rspec);
++  code_section()->relocate(inst_mark(), rspec);
 +  li32(dst, 0xDEADBEEF);
 +  zero_extend(dst, dst, 32);
 +}
@@ -21412,8 +21851,9 @@ index 000000000..5d6078bb3
 +  int index = oop_recorder()->find_index(k);
 +  assert(!Universe::heap()->is_in_reserved(k), "should not be an oop");
 +
++  InstructionMark im(this);
 +  RelocationHolder rspec = metadata_Relocation::spec(index);
-+  code_section()->relocate(pc(), rspec);
++  code_section()->relocate(inst_mark(), rspec);
 +  narrowKlass nk = Klass::encode_klass(k);
 +  li32(dst, nk);
 +  zero_extend(dst, dst, 32);
@@ -21421,7 +21861,7 @@ index 000000000..5d6078bb3
 +
 +// Maybe emit a call via a trampoline.  If the code cache is small
 +// trampolines won't be emitted.
-+address MacroAssembler::trampoline_call(Address entry) {
++address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
 +  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
 +  assert(entry.rspec().type() == relocInfo::runtime_call_type ||
 +         entry.rspec().type() == relocInfo::opt_virtual_call_type ||
@@ -21442,22 +21882,22 @@ index 000000000..5d6078bb3
 +    if (!in_scratch_emit_size) {
 +      address stub = emit_trampoline_stub(offset(), entry.target());
 +      if (stub == NULL) {
-+	postcond(pc() == badAddress);
++        postcond(pc() == badAddress);
 +        return NULL; // CodeCache is full
 +      }
 +    }
 +  }
 +
-+  address call_pc = pc();
++  if (cbuf != NULL) { cbuf->set_insts_mark(); }
 +  relocate(entry.rspec());
 +  if (!far_branches()) {
 +    jal(entry.target());
 +  } else {
 +    jal(pc());
 +  }
-+
++  // just need to return a non-null address
 +  postcond(pc() != badAddress);
-+  return call_pc;
++  return pc();
 +}
 +
 +address MacroAssembler::ic_call(address entry, jint method_index) {
@@ -21480,8 +21920,8 @@ index 000000000..5d6078bb3
 +
 +address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
 +                                             address dest) {
-+  // Max stub size: alignment nop, TrampolineStub.
-+  address stub = start_a_stub(NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size);
++  address stub = start_a_stub(NativeInstruction::instruction_size
++                            + NativeCallTrampolineStub::instruction_size);
 +  if (stub == NULL) {
 +    return NULL;  // CodeBuffer::expand failed
 +  }
@@ -21492,7 +21932,8 @@ index 000000000..5d6078bb3
 +
 +  // make sure 4 byte aligned here, so that the destination address would be
 +  // 8 byte aligned after 3 intructions
-+  while (offset() % wordSize == 0) { nop(); }
++  // when we reach here we may get a 2-byte alignment so need to align it
++  align(wordSize, NativeCallTrampolineStub::data_offset);
 +
 +  relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
 +                                            insts_call_instruction_offset));
@@ -21507,6 +21948,7 @@ index 000000000..5d6078bb3
 +  bind(target);
 +  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
 +         "should be");
++  assert(offset() % wordSize == 0, "bad alignment");
 +  emit_int64((intptr_t)dest);
 +
 +  const address stub_start_addr = addr_at(stub_start_offset);
@@ -21522,54 +21964,26 @@ index 000000000..5d6078bb3
 +    case Address::base_plus_offset:
 +      // This is the expected mode, although we allow all the other
 +      // forms below.
-+      return form_address(dst.base(), dst.offset(), 12, t1);
++      return form_address(t1, dst.base(), dst.offset());
 +    default:
 +      la(t1, dst);
 +      return Address(t1);
 +  }
 +}
 +
-+void MacroAssembler::increment(const Address dst, int64_t value) {
-+  assert(((dst.getMode() == Address::base_plus_offset &&
-+           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
-+          "invalid value and address mode combination");
-+  Address adr = add_memory_helper(dst);
-+  assert(!adr.uses(t0), "invalid dst for address increment");
-+  ld(t0, adr);
-+  add(t0, t0, value, t1);
-+  sd(t0, adr);
-+}
-+
-+void MacroAssembler::incrementw(const Address dst, int32_t value) {
-+  assert(((dst.getMode() == Address::base_plus_offset &&
-+           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
-+          "invalid value and address mode combination");
-+  Address adr = add_memory_helper(dst);
-+  assert(!adr.uses(t0), "invalid dst for address increment");
-+  lwu(t0, adr);
-+  addw(t0, t0, value, t1);
-+  sw(t0, adr);
-+}
-+
-+void MacroAssembler::decrement(const Address dst, int64_t value) {
-+  assert(((dst.getMode() == Address::base_plus_offset &&
-+           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
-+          "invalid value and address mode combination");
++void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) {
 +  Address adr = add_memory_helper(dst);
-+  assert(!adr.uses(t0), "invalid dst for address decrement");
++  assert_different_registers(adr.base(), t0);
 +  ld(t0, adr);
-+  sub(t0, t0, value, t1);
++  addi(t0, t0, imm);
 +  sd(t0, adr);
 +}
 +
-+void MacroAssembler::decrementw(const Address dst, int32_t value) {
-+  assert(((dst.getMode() == Address::base_plus_offset &&
-+           is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
-+          "invalid value and address mode combination");
++void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) {
 +  Address adr = add_memory_helper(dst);
-+  assert(!adr.uses(t0), "invalid dst for address decrement");
++  assert_different_registers(adr.base(), t0);
 +  lwu(t0, adr);
-+  subw(t0, t0, value, t1);
++  addiw(t0, t0, imm);
 +  sw(t0, adr);
 +}
 +
@@ -21581,2693 +21995,2308 @@ index 000000000..5d6078bb3
 +  beq(src1, t0, equal);
 +}
 +
-+void MacroAssembler::oop_equal(Register obj1, Register obj2, Label& equal, bool is_far) {
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->obj_equals(this, obj1, obj2, equal, is_far);
++// string indexof
++// compute index by trailing zeros
++void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
++                                   Register match_mask, Register result,
++                                   Register ch2, Register tmp,
++                                   bool haystack_isL)
++{
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  srl(match_mask, match_mask, trailing_zeros);
++  srli(match_mask, match_mask, 1);
++  srli(tmp, trailing_zeros, LogBitsPerByte);
++  if (!haystack_isL) andi(tmp, tmp, 0xE);
++  add(haystack, haystack, tmp);
++  ld(ch2, Address(haystack));
++  if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
++  add(result, result, tmp);
 +}
 +
-+void MacroAssembler::oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far) {
-+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
-+  bs->obj_nequals(this, obj1, obj2, nequal, is_far);
++// string indexof
++// Find pattern element in src, compute match mask,
++// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
++// match mask patterns and corresponding indices would be like:
++// - 0x8080808080808080 (Latin1)
++// -   7 6 5 4 3 2 1 0  (match index)
++// - 0x8000800080008000 (UTF16)
++// -   3   2   1   0    (match index)
++void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
++                                        Register mask1, Register mask2)
++{
++  xorr(src, pattern, src);
++  sub(match_mask, src, mask1);
++  orr(src, src, mask2);
++  notr(src, src);
++  andr(match_mask, match_mask, src);
 +}
 +
 +#ifdef COMPILER2
-+// Set dst NaN if either source is NaN.
-+void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
-+                                  bool is_double, bool is_min) {
-+  assert_different_registers(dst, src1, src2);
-+  Label Ldone;
-+  fsflags(zr);
-+  if (is_double) {
-+    if (is_min) {
-+      fmin_d(dst, src1, src2);
-+    } else {
-+      fmax_d(dst, src1, src2);
-+    }
-+    // flt is just used for set fflag NV
-+    flt_d(zr, src1, src2);
-+  } else {
-+    if (is_min) {
-+      fmin_s(dst, src1, src2);
-+    } else {
-+      fmax_s(dst, src1, src2);
-+    }
-+    // flt is just used for set fflag NV
-+    flt_s(zr, src1, src2);
-+  }
-+  frflags(t0);
-+  beqz(t0, Ldone);
++// Code for BigInteger::mulAdd instrinsic
++// out     = x10
++// in      = x11
++// offset  = x12  (already out.length-offset)
++// len     = x13
++// k       = x14
++// tmp     = x28
++//
++// pseudo code from java implementation:
++// long kLong = k & LONG_MASK;
++// carry = 0;
++// offset = out.length-offset - 1;
++// for (int j = len - 1; j >= 0; j--) {
++//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
++//     out[offset--] = (int)product;
++//     carry = product >>> 32;
++// }
++// return (int)carry;
++void MacroAssembler::mul_add(Register out, Register in, Register offset,
++                             Register len, Register k, Register tmp) {
++  Label L_tail_loop, L_unroll, L_end;
++  mv(tmp, out);
++  mv(out, zr);
++  blez(len, L_end);
++  zero_extend(k, k, 32);
++  slliw(t0, offset, LogBytesPerInt);
++  add(offset, tmp, t0);
++  slliw(t0, len, LogBytesPerInt);
++  add(in, in, t0);
++
++  const int unroll = 8;
++  li(tmp, unroll);
++  blt(len, tmp, L_tail_loop);
++  bind(L_unroll);
++  for (int i = 0; i < unroll; i++) {
++    sub(in, in, BytesPerInt);
++    lwu(t0, Address(in, 0));
++    mul(t1, t0, k);
++    add(t0, t1, out);
++    sub(offset, offset, BytesPerInt);
++    lwu(t1, Address(offset, 0));
++    add(t0, t0, t1);
++    sw(t0, Address(offset, 0));
++    srli(out, t0, 32);
++  }
++  subw(len, len, tmp);
++  bge(len, tmp, L_unroll);
++
++  bind(L_tail_loop);
++  blez(len, L_end);
++  sub(in, in, BytesPerInt);
++  lwu(t0, Address(in, 0));
++  mul(t1, t0, k);
++  add(t0, t1, out);
++  sub(offset, offset, BytesPerInt);
++  lwu(t1, Address(offset, 0));
++  add(t0, t0, t1);
++  sw(t0, Address(offset, 0));
++  srli(out, t0, 32);
++  subw(len, len, 1);
++  j(L_tail_loop);
 +
-+  // Src1 or src2 must be NaN here. Set dst NaN.
-+  if (is_double) {
-+    fadd_d(dst, src1, src2);
-+  } else {
-+    fadd_s(dst, src1, src2);
-+  }
-+  bind(Ldone);
++  bind(L_end);
 +}
 +
-+address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
-+                                      Register tmp4, Register tmp5, Register tmp6, Register result,
-+                                      Register cnt1, int elem_size) {
-+  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+  Register cnt2 = tmp2;  // cnt2 only used in array length compare
-+  Register elem_per_word = tmp6;
-+  int log_elem_size = exact_log2(elem_size);
-+  int length_offset = arrayOopDesc::length_offset_in_bytes();
-+  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
-+
-+  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
-+  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
-+  mv(elem_per_word, wordSize / elem_size);
-+
-+  BLOCK_COMMENT("arrays_equals {");
-+
-+  // if (a1 == a2), return true
-+  oop_equal(a1, a2, SAME);
-+
-+  mv(result, false);
-+  beqz(a1, DONE);
-+  beqz(a2, DONE);
-+  lwu(cnt1, Address(a1, length_offset));
-+  lwu(cnt2, Address(a2, length_offset));
-+  bne(cnt2, cnt1, DONE);
-+  beqz(cnt1, SAME);
++// add two unsigned input and output carry
++void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
++{
++  assert_different_registers(dst, carry);
++  assert_different_registers(dst, src2);
++  add(dst, src1, src2);
++  sltu(carry, dst, src2);
++}
 +
-+  slli(tmp5, cnt1, 3 + log_elem_size);
-+  sub(tmp5, zr, tmp5);
-+  add(a1, a1, base_offset);
-+  add(a2, a2, base_offset);
-+  ld(tmp3, Address(a1, 0));
-+  ld(tmp4, Address(a2, 0));
-+  ble(cnt1, elem_per_word, SHORT); // short or same
++// add two input with carry
++void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
++{
++  assert_different_registers(dst, carry);
++  add(dst, src1, src2);
++  add(dst, dst, carry);
++}
 +
-+  // Main 16 byte comparison loop with 2 exits
-+  bind(NEXT_DWORD); {
-+    ld(tmp1, Address(a1, wordSize));
-+    ld(tmp2, Address(a2, wordSize));
-+    sub(cnt1, cnt1, 2 * wordSize / elem_size);
-+    blez(cnt1, TAIL);
-+    bne(tmp3, tmp4, DONE);
-+    ld(tmp3, Address(a1, 2 * wordSize));
-+    ld(tmp4, Address(a2, 2 * wordSize));
-+    add(a1, a1, 2 * wordSize);
-+    add(a2, a2, 2 * wordSize);
-+    ble(cnt1, elem_per_word, TAIL2);
-+  } beq(tmp1, tmp2, NEXT_DWORD);
-+  j(DONE);
++// add two unsigned input with carry and output carry
++void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
++{
++  assert_different_registers(dst, src2);
++  adc(dst, src1, src2, carry);
++  sltu(carry, dst, src2);
++}
 +
-+  bind(TAIL);
-+  xorr(tmp4, tmp3, tmp4);
-+  xorr(tmp2, tmp1, tmp2);
-+  sll(tmp2, tmp2, tmp5);
-+  orr(tmp5, tmp4, tmp2);
-+  j(IS_TMP5_ZR);
++void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
++                                     Register src1, Register src2, Register carry)
++{
++  cad(dest_lo, dest_lo, src1, carry);
++  add(dest_hi, dest_hi, carry);
++  cad(dest_lo, dest_lo, src2, carry);
++  add(final_dest_hi, dest_hi, carry);
++}
 +
-+  bind(TAIL2);
-+  bne(tmp1, tmp2, DONE);
++/**
++ * Multiply 32 bit by 32 bit first loop.
++ */
++void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
++                                           Register y, Register y_idx, Register z,
++                                           Register carry, Register product,
++                                           Register idx, Register kdx)
++{
++  // jlong carry, x[], y[], z[];
++  // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
++  //     long product = y[idx] * x[xstart] + carry;
++  //     z[kdx] = (int)product;
++  //     carry = product >>> 32;
++  // }
++  // z[xstart] = (int)carry;
 +
-+  bind(SHORT);
-+  xorr(tmp4, tmp3, tmp4);
-+  sll(tmp5, tmp4, tmp5);
++  Label L_first_loop, L_first_loop_exit;
++  blez(idx, L_first_loop_exit);
 +
-+  bind(IS_TMP5_ZR);
-+  bnez(tmp5, DONE);
++  shadd(t0, xstart, x, t0, LogBytesPerInt);
++  lwu(x_xstart, Address(t0, 0));
 +
-+  bind(SAME);
-+  mv(result, true);
-+  // That's it.
-+  bind(DONE);
++  bind(L_first_loop);
++  subw(idx, idx, 1);
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  lwu(y_idx, Address(t0, 0));
++  mul(product, x_xstart, y_idx);
++  add(product, product, carry);
++  srli(carry, product, 32);
++  subw(kdx, kdx, 1);
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sw(product, Address(t0, 0));
++  bgtz(idx, L_first_loop);
 +
-+  BLOCK_COMMENT("} array_equals");
-+  postcond(pc() != badAddress);
-+  return pc();
++  bind(L_first_loop_exit);
 +}
 +
-+// Compare Strings
++/**
++ * Multiply 64 bit by 64 bit first loop.
++ */
++void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
++                                           Register y, Register y_idx, Register z,
++                                           Register carry, Register product,
++                                           Register idx, Register kdx)
++{
++  //
++  //  jlong carry, x[], y[], z[];
++  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
++  //    huge_128 product = y[idx] * x[xstart] + carry;
++  //    z[kdx] = (jlong)product;
++  //    carry  = (jlong)(product >>> 64);
++  //  }
++  //  z[xstart] = carry;
++  //
 +
-+// For Strings we're passed the address of the first characters in a1
-+// and a2 and the length in cnt1.
-+// elem_size is the element size in bytes: either 1 or 2.
-+// There are two implementations.  For arrays >= 8 bytes, all
-+// comparisons (including the final one, which may overlap) are
-+// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
-+// halfword, then a short, and then a byte.
++  Label L_first_loop, L_first_loop_exit;
++  Label L_one_x, L_one_y, L_multiply;
 +
-+void MacroAssembler::string_equals(Register a1, Register a2,
-+                                   Register result, Register cnt1, int elem_size)
-+{
-+  Label SAME, DONE, SHORT, NEXT_WORD;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
++  subw(xstart, xstart, 1);
++  bltz(xstart, L_one_x);
 +
-+  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
-+  assert_different_registers(a1, a2, result, cnt1, t0, t1);
++  shadd(t0, xstart, x, t0, LogBytesPerInt);
++  ld(x_xstart, Address(t0, 0));
++  ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
 +
-+  BLOCK_COMMENT("string_equals {");
++  bind(L_first_loop);
++  subw(idx, idx, 1);
++  bltz(idx, L_first_loop_exit);
++  subw(idx, idx, 1);
++  bltz(idx, L_one_y);
 +
-+  beqz(cnt1, SAME);
-+  mv(result, false);
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  ld(y_idx, Address(t0, 0));
++  ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
++  bind(L_multiply);
 +
-+  // Check for short strings, i.e. smaller than wordSize.
-+  sub(cnt1, cnt1, wordSize);
-+  blez(cnt1, SHORT);
++  mulhu(t0, x_xstart, y_idx);
++  mul(product, x_xstart, y_idx);
++  cad(product, product, carry, t1);
++  adc(carry, t0, zr, t1);
 +
-+  // Main 8 byte comparison loop.
-+  bind(NEXT_WORD); {
-+    ld(tmp1, Address(a1, 0));
-+    add(a1, a1, wordSize);
-+    ld(tmp2, Address(a2, 0));
-+    add(a2, a2, wordSize);
-+    sub(cnt1, cnt1, wordSize);
-+    bne(tmp1, tmp2, DONE);
-+  } bgtz(cnt1, NEXT_WORD);
++  subw(kdx, kdx, 2);
++  ror_imm(product, product, 32); // back to big-endian
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sd(product, Address(t0, 0));
 +
-+  if (!AvoidUnalignedAccesses) {
-+    // Last longword.  In the case where length == 4 we compare the
-+    // same longword twice, but that's still faster than another
-+    // conditional branch.
-+    // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
-+    // length == 4.
-+    add(tmp1, a1, cnt1);
-+    ld(tmp1, Address(tmp1, 0));
-+    add(tmp2, a2, cnt1);
-+    ld(tmp2, Address(tmp2, 0));
-+    bne(tmp1, tmp2, DONE);
-+    j(SAME);
-+  }
++  j(L_first_loop);
 +
-+  bind(SHORT);
-+  ld(tmp1, Address(a1));
-+  ld(tmp2, Address(a2));
-+  xorr(tmp1, tmp1, tmp2);
-+  neg(cnt1, cnt1);
-+  slli(cnt1, cnt1, LogBitsPerByte);
-+  sll(tmp1, tmp1, cnt1);
-+  bnez(tmp1, DONE);
++  bind(L_one_y);
++  lwu(y_idx, Address(y, 0));
++  j(L_multiply);
 +
-+  // Arrays are equal.
-+  bind(SAME);
-+  mv(result, true);
++  bind(L_one_x);
++  lwu(x_xstart, Address(x, 0));
++  j(L_first_loop);
 +
-+  // That's it.
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_equals");
++  bind(L_first_loop_exit);
 +}
 +
-+typedef void (MacroAssembler::*load_chr_insn)(Register Rd, const Address &adr, Register temp);
-+
-+// Compare strings.
-+void MacroAssembler::string_compare(Register str1, Register str2,
-+                                    Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
-+                                    Register tmp3, int ae)
++/**
++ * Multiply 128 bit by 128 bit. Unrolled inner loop.
++ *
++ */
++void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
++                                             Register carry, Register carry2,
++                                             Register idx, Register jdx,
++                                             Register yz_idx1, Register yz_idx2,
++                                             Register tmp, Register tmp3, Register tmp4,
++                                             Register tmp6, Register product_hi)
 +{
-+  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
-+      DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
-+      SHORT_LOOP_START, TAIL_CHECK, L;
++  //   jlong carry, x[], y[], z[];
++  //   int kdx = xstart+1;
++  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
++  //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
++  //     jlong carry2  = (jlong)(tmp3 >>> 64);
++  //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
++  //     carry  = (jlong)(tmp4 >>> 64);
++  //     z[kdx+idx+1] = (jlong)tmp3;
++  //     z[kdx+idx] = (jlong)tmp4;
++  //   }
++  //   idx += 2;
++  //   if (idx > 0) {
++  //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
++  //     z[kdx+idx] = (jlong)yz_idx1;
++  //     carry  = (jlong)(yz_idx1 >>> 64);
++  //   }
++  //
 +
-+  const int STUB_THRESHOLD = 64 + 8;
-+  bool isLL = ae == StrIntrinsicNode::LL;
-+  bool isLU = ae == StrIntrinsicNode::LU;
-+  bool isUL = ae == StrIntrinsicNode::UL;
++  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
 +
-+  bool str1_isL = isLL || isLU;
-+  bool str2_isL = isLL || isUL;
++  srliw(jdx, idx, 2);
 +
-+  // for L strings, 1 byte for 1 character
-+  // for U strings, 2 bytes for 1 character
-+  int str1_chr_size = str1_isL ? 1 : 2;
-+  int str2_chr_size = str2_isL ? 1 : 2;
-+  int minCharsInWord = isLL ? wordSize : wordSize / 2;
++  bind(L_third_loop);
 +
-+  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
++  subw(jdx, jdx, 1);
++  bltz(jdx, L_third_loop_exit);
++  subw(idx, idx, 4);
 +
-+  BLOCK_COMMENT("string_compare {");
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  ld(yz_idx2, Address(t0, 0));
++  ld(yz_idx1, Address(t0, wordSize));
 +
-+  // Bizzarely, the counts are passed in bytes, regardless of whether they
-+  // are L or U strings, however the result is always in characters.
-+  if (!str1_isL) {
-+    sraiw(cnt1, cnt1, 1);
-+  }
-+  if (!str2_isL) {
-+    sraiw(cnt2, cnt2, 1);
-+  }
++  shadd(tmp6, idx, z, t0, LogBytesPerInt);
 +
-+  // Compute the minimum of the string lengths and save the difference in result.
-+  sub(result, cnt1, cnt2);
-+  bgt(cnt1, cnt2, L);
-+  mv(cnt2, cnt1);
-+  bind(L);
++  ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
++  ror_imm(yz_idx2, yz_idx2, 32);
 +
-+  // A very short string
-+  mv(t0, minCharsInWord);
-+  ble(cnt2, t0, SHORT_STRING);
++  ld(t1, Address(tmp6, 0));
++  ld(t0, Address(tmp6, wordSize));
 +
-+  // Compare longwords
-+  // load first parts of strings and finish initialization while loading
-+  {
-+    if (str1_isL == str2_isL) { // LL or UU
-+      // check if str1 and str2 are same string
-+      beq(str1, str2, DONE);
-+      // load 8 bytes once to compare
-+      ld(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+      mv(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      sub(cnt2, cnt2, minCharsInWord);
-+      beqz(cnt2, TAIL_CHECK);
-+      // convert cnt2 from characters to bytes
-+      if(!str1_isL) {
-+        slli(cnt2, cnt2, 1);
-+      }
-+      add(str2, str2, cnt2);
-+      add(str1, str1, cnt2);
-+      sub(cnt2, zr, cnt2);
-+    } else if (isLU) { // LU case
-+      lwu(tmp1, Address(str1));
-+      ld(tmp2, Address(str2));
-+      mv(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      addi(cnt2, cnt2, -4);
-+      add(str1, str1, cnt2);
-+      sub(cnt1, zr, cnt2);
-+      slli(cnt2, cnt2, 1);
-+      add(str2, str2, cnt2);
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+      sub(cnt2, zr, cnt2);
-+      addi(cnt1, cnt1, 4);
-+    } else { // UL case
-+      ld(tmp1, Address(str1));
-+      lwu(tmp2, Address(str2));
-+      mv(t0, STUB_THRESHOLD);
-+      bge(cnt2, t0, STUB);
-+      addi(cnt2, cnt2, -4);
-+      slli(t0, cnt2, 1);
-+      sub(cnt1, zr, t0);
-+      add(str1, str1, t0);
-+      add(str2, str2, cnt2);
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+      sub(cnt2, zr, cnt2);
-+      addi(cnt1, cnt1, 8);
-+    }
-+    addi(cnt2, cnt2, isUL ? 4 : 8);
-+    bgez(cnt2, TAIL);
-+    xorr(tmp3, tmp1, tmp2);
-+    bnez(tmp3, DIFFERENCE);
++  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
++  mulhu(tmp4, product_hi, yz_idx1);
 +
-+    // main loop
-+    bind(NEXT_WORD);
-+    if (str1_isL == str2_isL) { // LL or UU
-+      add(t0, str1, cnt2);
-+      ld(tmp1, Address(t0));
-+      add(t0, str2, cnt2);
-+      ld(tmp2, Address(t0));
-+      addi(cnt2, cnt2, 8);
-+    } else if (isLU) { // LU case
-+      add(t0, str1, cnt1);
-+      lwu(tmp1, Address(t0));
-+      add(t0, str2, cnt2);
-+      ld(tmp2, Address(t0));
-+      addi(cnt1, cnt1, 4);
-+      inflate_lo32(tmp3, tmp1);
-+      mv(tmp1, tmp3);
-+      addi(cnt2, cnt2, 8);
-+    } else { // UL case
-+      add(t0, str2, cnt2);
-+      lwu(tmp2, Address(t0));
-+      add(t0, str1, cnt1);
-+      ld(tmp1, Address(t0));
-+      inflate_lo32(tmp3, tmp2);
-+      mv(tmp2, tmp3);
-+      addi(cnt1, cnt1, 8);
-+      addi(cnt2, cnt2, 4);
-+    }
-+    bgez(cnt2, TAIL);
++  ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
++  ror_imm(t1, t1, 32, tmp);
 +
-+    xorr(tmp3, tmp1, tmp2);
-+    beqz(tmp3, NEXT_WORD);
-+    j(DIFFERENCE);
-+    bind(TAIL);
-+    xorr(tmp3, tmp1, tmp2);
-+    bnez(tmp3, DIFFERENCE);
-+    // Last longword.
-+    if (AvoidUnalignedAccesses) {
-+      // Aligned access. Load bytes from byte-aligned address,
-+      // which may contain invalid bytes when remaining bytes is
-+      // less than 4(UL/LU) or 8 (LL/UU).
-+      // Invalid bytes should be removed before comparison.
-+      if (str1_isL == str2_isL) { // LL or UU
-+        add(t0, str1, cnt2);
-+        ld(tmp1, Address(t0));
-+        add(t0, str2, cnt2);
-+        ld(tmp2, Address(t0));
-+      } else if (isLU) { // LU
-+        add(t0, str1, cnt1);
-+        lwu(tmp1, Address(t0));
-+        add(t0, str2, cnt2);
-+        ld(tmp2, Address(t0));
-+        inflate_lo32(tmp3, tmp1);
-+        mv(tmp1, tmp3);
-+      } else {  // UL
-+        add(t0, str1, cnt1);
-+        ld(tmp1, Address(t0));
-+        add(t0, str2, cnt2);
-+        lwu(tmp2, Address(t0));
-+        inflate_lo32(tmp3, tmp2);
-+        mv(tmp2, tmp3);
-+        slli(cnt2, cnt2, 1);  // UL case should convert cnt2 to bytes
-+      }
-+      // remove invalid bytes
-+      slli(t0, cnt2, LogBitsPerByte);
-+      sll(tmp1, tmp1, t0);
-+      sll(tmp2, tmp2, t0);
-+    } else {
-+      // Last longword.  In the case where length == 4 we compare the
-+      // same longword twice, but that's still faster than another
-+      // conditional branch.
-+      if (str1_isL == str2_isL) { // LL or UU
-+        ld(tmp1, Address(str1));
-+        ld(tmp2, Address(str2));
-+      } else if (isLU) { // LU case
-+        lwu(tmp1, Address(str1));
-+        ld(tmp2, Address(str2));
-+        inflate_lo32(tmp3, tmp1);
-+        mv(tmp1, tmp3);
-+      } else { // UL case
-+        ld(tmp1, Address(str1));
-+        lwu(tmp2, Address(str2));
-+        inflate_lo32(tmp3, tmp2);
-+        mv(tmp2, tmp3);
-+      }
-+    }
-+    bind(TAIL_CHECK);
-+    xorr(tmp3, tmp1, tmp2);
-+    beqz(tmp3, DONE);
++  mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
++  mulhu(carry2, product_hi, yz_idx2);
 +
-+    // Find the first different characters in the longwords and
-+    // compute their difference.
-+    bind(DIFFERENCE);
-+    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
-+    srl(tmp1, tmp1, result);
-+    srl(tmp2, tmp2, result);
-+    if (isLL) {
-+      andi(tmp1, tmp1, 0xFF);
-+      andi(tmp2, tmp2, 0xFF);
-+    } else {
-+      andi(tmp1, tmp1, 0xFFFF);
-+      andi(tmp2, tmp2, 0xFFFF);
-+    }
-+    sub(result, tmp1, tmp2);
-+    j(DONE);
-+  }
++  cad(tmp3, tmp3, carry, carry);
++  adc(tmp4, tmp4, zr, carry);
++  cad(tmp3, tmp3, t0, t0);
++  cadc(tmp4, tmp4, tmp, t0);
++  adc(carry, carry2, zr, t0);
++  cad(tmp4, tmp4, t1, carry2);
++  adc(carry, carry, zr, carry2);
 +
-+  bind(STUB);
-+  RuntimeAddress stub = NULL;
-+  switch (ae) {
-+    case StrIntrinsicNode::LL:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
-+      break;
-+    case StrIntrinsicNode::UU:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
-+      break;
-+    case StrIntrinsicNode::LU:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
-+      break;
-+    case StrIntrinsicNode::UL:
-+      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
-+  trampoline_call(stub);
-+  j(DONE);
++  ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
++  ror_imm(tmp4, tmp4, 32);
++  sd(tmp4, Address(tmp6, 0));
++  sd(tmp3, Address(tmp6, wordSize));
 +
-+  bind(SHORT_STRING);
-+  // Is the minimum length zero?
-+  beqz(cnt2, DONE);
-+  // arrange code to do most branches while loading and loading next characters
-+  // while comparing previous
-+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST_INIT);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  j(SHORT_LOOP_START);
-+  bind(SHORT_LOOP);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST);
-+  bind(SHORT_LOOP_START);
-+  (this->*str1_load_chr)(tmp2, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  (this->*str2_load_chr)(t0, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
-+  addi(cnt2, cnt2, -1);
-+  beqz(cnt2, SHORT_LAST2);
-+  (this->*str1_load_chr)(tmp1, Address(str1), t0);
-+  addi(str1, str1, str1_chr_size);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  beq(tmp2, t0, SHORT_LOOP);
-+  sub(result, tmp2, t0);
-+  j(DONE);
-+  bind(SHORT_LOOP_TAIL);
-+  sub(result, tmp1, cnt1);
-+  j(DONE);
-+  bind(SHORT_LAST2);
-+  beq(tmp2, t0, DONE);
-+  sub(result, tmp2, t0);
++  j(L_third_loop);
 +
-+  j(DONE);
-+  bind(SHORT_LAST_INIT);
-+  (this->*str2_load_chr)(cnt1, Address(str2), t0);
-+  addi(str2, str2, str2_chr_size);
-+  bind(SHORT_LAST);
-+  beq(tmp1, cnt1, DONE);
-+  sub(result, tmp1, cnt1);
++  bind(L_third_loop_exit);
 +
-+  bind(DONE);
++  andi(idx, idx, 0x3);
++  beqz(idx, L_post_third_loop_done);
 +
-+  BLOCK_COMMENT("} string_compare");
-+}
++  Label L_check_1;
++  subw(idx, idx, 2);
++  bltz(idx, L_check_1);
 +
-+// short string
-+// StringUTF16.indexOfChar
-+// StringLatin1.indexOfChar
-+void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
-+                                               Register ch, Register result,
-+                                               bool isL)
-+{
-+  Register ch1 = t0;
-+  Register index = t1;
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  ld(yz_idx1, Address(t0, 0));
++  ror_imm(yz_idx1, yz_idx1, 32);
 +
-+  BLOCK_COMMENT("string_indexof_char_short {");
++  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
++  mulhu(tmp4, product_hi, yz_idx1);
 +
-+  Label LOOP, LOOP1, LOOP4, LOOP8;
-+  Label MATCH,  MATCH1, MATCH2, MATCH3,
-+        MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
++  shadd(t0, idx, z, t0, LogBytesPerInt);
++  ld(yz_idx2, Address(t0, 0));
++  ror_imm(yz_idx2, yz_idx2, 32, tmp);
 +
-+  mv(result, -1);
-+  mv(index, zr);
++  add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
 +
-+  bind(LOOP);
-+  addi(t0, index, 8);
-+  ble(t0, cnt1, LOOP8);
-+  addi(t0, index, 4);
-+  ble(t0, cnt1, LOOP4);
-+  j(LOOP1);
++  ror_imm(tmp3, tmp3, 32, tmp);
++  sd(tmp3, Address(t0, 0));
 +
-+  bind(LOOP8);
-+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
-+  beq(ch, ch1, MATCH);
-+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
-+  beq(ch, ch1, MATCH1);
-+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
-+  beq(ch, ch1, MATCH2);
-+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
-+  beq(ch, ch1, MATCH3);
-+  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
-+  beq(ch, ch1, MATCH4);
-+  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
-+  beq(ch, ch1, MATCH5);
-+  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
-+  beq(ch, ch1, MATCH6);
-+  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
-+  beq(ch, ch1, MATCH7);
-+  addi(index, index, 8);
-+  addi(str1, str1, isL ? 8 : 16);
-+  blt(index, cnt1, LOOP);
-+  j(NOMATCH);
++  bind(L_check_1);
 +
-+  bind(LOOP4);
-+  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
-+  beq(ch, ch1, MATCH);
-+  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
-+  beq(ch, ch1, MATCH1);
-+  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
-+  beq(ch, ch1, MATCH2);
-+  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
-+  beq(ch, ch1, MATCH3);
-+  addi(index, index, 4);
-+  addi(str1, str1, isL ? 4 : 8);
-+  bge(index, cnt1, NOMATCH);
++  andi(idx, idx, 0x1);
++  subw(idx, idx, 1);
++  bltz(idx, L_post_third_loop_done);
++  shadd(t0, idx, y, t0, LogBytesPerInt);
++  lwu(tmp4, Address(t0, 0));
++  mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
++  mulhu(carry2, tmp4, product_hi);
 +
-+  bind(LOOP1);
-+  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
-+  beq(ch, ch1, MATCH);
-+  addi(index, index, 1);
-+  addi(str1, str1, isL ? 1 : 2);
-+  blt(index, cnt1, LOOP1);
-+  j(NOMATCH);
++  shadd(t0, idx, z, t0, LogBytesPerInt);
++  lwu(tmp4, Address(t0, 0));
 +
-+  bind(MATCH1);
-+  addi(index, index, 1);
-+  j(MATCH);
++  add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
 +
-+  bind(MATCH2);
-+  addi(index, index, 2);
-+  j(MATCH);
++  shadd(t0, idx, z, t0, LogBytesPerInt);
++  sw(tmp3, Address(t0, 0));
 +
-+  bind(MATCH3);
-+  addi(index, index, 3);
-+  j(MATCH);
++  slli(t0, carry2, 32);
++  srli(carry, tmp3, 32);
++  orr(carry, carry, t0);
 +
-+  bind(MATCH4);
-+  addi(index, index, 4);
-+  j(MATCH);
++  bind(L_post_third_loop_done);
++}
 +
-+  bind(MATCH5);
-+  addi(index, index, 5);
-+  j(MATCH);
++/**
++ * Code for BigInteger::multiplyToLen() intrinsic.
++ *
++ * x10: x
++ * x11: xlen
++ * x12: y
++ * x13: ylen
++ * x14: z
++ * x15: zlen
++ * x16: tmp1
++ * x17: tmp2
++ * x7:  tmp3
++ * x28: tmp4
++ * x29: tmp5
++ * x30: tmp6
++ * x31: tmp7
++ */
++void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
++                                     Register z, Register zlen,
++                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
++                                     Register tmp5, Register tmp6, Register product_hi)
++{
++  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
 +
-+  bind(MATCH6);
-+  addi(index, index, 6);
-+  j(MATCH);
++  const Register idx = tmp1;
++  const Register kdx = tmp2;
++  const Register xstart = tmp3;
 +
-+  bind(MATCH7);
-+  addi(index, index, 7);
++  const Register y_idx = tmp4;
++  const Register carry = tmp5;
++  const Register product = xlen;
++  const Register x_xstart = zlen; // reuse register
 +
-+  bind(MATCH);
-+  mv(result, index);
-+  bind(NOMATCH);
-+  BLOCK_COMMENT("} string_indexof_char_short");
-+}
++  mv(idx, ylen); // idx = ylen;
++  mv(kdx, zlen); // kdx = xlen+ylen;
++  mv(carry, zr); // carry = 0;
 +
-+// StringUTF16.indexOfChar
-+// StringLatin1.indexOfChar
-+void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
-+                                         Register ch, Register result,
-+                                         Register tmp1, Register tmp2,
-+                                         Register tmp3, Register tmp4,
-+                                         bool isL)
-+{
-+  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
-+  Register ch1 = t0;
-+  Register orig_cnt = t1;
-+  Register mask1 = tmp3;
-+  Register mask2 = tmp2;
-+  Register match_mask = tmp1;
-+  Register trailing_char = tmp4;
-+  Register unaligned_elems = tmp4;
++  Label L_multiply_64_x_64_loop, L_done;
 +
-+  BLOCK_COMMENT("string_indexof_char {");
-+  beqz(cnt1, NOMATCH);
++  subw(xstart, xlen, 1);
++  bltz(xstart, L_done);
 +
-+  addi(t0, cnt1, isL ? -32 : -16);
-+  bgtz(t0, DO_LONG);
-+  string_indexof_char_short(str1, cnt1, ch, result, isL);
-+  j(DONE);
++  const Register jdx = tmp1;
 +
-+  bind(DO_LONG);
-+  mv(orig_cnt, cnt1);
 +  if (AvoidUnalignedAccesses) {
-+    Label ALIGNED;
-+    andi(unaligned_elems, str1, 0x7);
-+    beqz(unaligned_elems, ALIGNED);
-+    sub(unaligned_elems, unaligned_elems, 8);
-+    neg(unaligned_elems, unaligned_elems);
-+    if (!isL) {
-+      srli(unaligned_elems, unaligned_elems, 1);
-+    }
-+    // do unaligned part per element
-+    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
-+    bgez(result, DONE);
-+    mv(orig_cnt, cnt1);
-+    sub(cnt1, cnt1, unaligned_elems);
-+    bind(ALIGNED);
-+  }
++    // Check if x and y are both 8-byte aligned.
++    orr(t0, xlen, ylen);
++    andi(t0, t0, 0x1);
++    beqz(t0, L_multiply_64_x_64_loop);
++
++    multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
++    shadd(t0, xstart, z, t0, LogBytesPerInt);
++    sw(carry, Address(t0, 0));
++
++    Label L_second_loop_unaligned;
++    bind(L_second_loop_unaligned);
++    mv(carry, zr);
++    mv(jdx, ylen);
++    subw(xstart, xstart, 1);
++    bltz(xstart, L_done);
++    sub(sp, sp, 2 * wordSize);
++    sd(z, Address(sp, 0));
++    sd(zr, Address(sp, wordSize));
++    shadd(t0, xstart, z, t0, LogBytesPerInt);
++    addi(z, t0, 4);
++    shadd(t0, xstart, x, t0, LogBytesPerInt);
++    lwu(product, Address(t0, 0));
++    Label L_third_loop, L_third_loop_exit;
++
++    blez(jdx, L_third_loop_exit);
++
++    bind(L_third_loop);
++    subw(jdx, jdx, 1);
++    shadd(t0, jdx, y, t0, LogBytesPerInt);
++    lwu(t0, Address(t0, 0));
++    mul(t1, t0, product);
++    add(t0, t1, carry);
++    shadd(tmp6, jdx, z, t1, LogBytesPerInt);
++    lwu(t1, Address(tmp6, 0));
++    add(t0, t0, t1);
++    sw(t0, Address(tmp6, 0));
++    srli(carry, t0, 32);
++    bgtz(jdx, L_third_loop);
++
++    bind(L_third_loop_exit);
++    ld(z, Address(sp, 0));
++    addi(sp, sp, 2 * wordSize);
++    shadd(t0, xstart, z, t0, LogBytesPerInt);
++    sw(carry, Address(t0, 0));
 +
-+  // duplicate ch
-+  if (isL) {
-+    slli(ch1, ch, 8);
-+    orr(ch, ch1, ch);
++    j(L_second_loop_unaligned);
 +  }
-+  slli(ch1, ch, 16);
-+  orr(ch, ch1, ch);
-+  slli(ch1, ch, 32);
-+  orr(ch, ch1, ch);
 +
-+  if (!isL) {
-+    slli(cnt1, cnt1, 1);
-+  }
++  bind(L_multiply_64_x_64_loop);
++  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
 +
-+  mv(mask1, isL ? 0x0101010101010101 : 0x0001000100010001);
-+  mv(mask2, isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
++  Label L_second_loop_aligned;
++  beqz(kdx, L_second_loop_aligned);
 +
-+  bind(CH1_LOOP);
-+  ld(ch1, Address(str1));
-+  addi(str1, str1, 8);
-+  addi(cnt1, cnt1, -8);
-+  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
-+  bnez(match_mask, HIT);
-+  bgtz(cnt1, CH1_LOOP);
-+  j(NOMATCH);
++  Label L_carry;
++  subw(kdx, kdx, 1);
++  beqz(kdx, L_carry);
 +
-+  bind(HIT);
-+  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
-+  srli(trailing_char, trailing_char, 3);
-+  addi(cnt1, cnt1, 8);
-+  ble(cnt1, trailing_char, NOMATCH);
-+  // match case
-+  if (!isL) {
-+    srli(cnt1, cnt1, 1);
-+    srli(trailing_char, trailing_char, 1);
-+  }
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
++  srli(carry, carry, 32);
++  subw(kdx, kdx, 1);
 +
-+  sub(result, orig_cnt, cnt1);
-+  add(result, result, trailing_char);
-+  j(DONE);
++  bind(L_carry);
++  shadd(t0, kdx, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
 +
-+  bind(NOMATCH);
-+  mv(result, -1);
++  // Second and third (nested) loops.
++  //
++  // for (int i = xstart-1; i >= 0; i--) { // Second loop
++  //   carry = 0;
++  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
++  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
++  //                    (z[k] & LONG_MASK) + carry;
++  //     z[k] = (int)product;
++  //     carry = product >>> 32;
++  //   }
++  //   z[i] = (int)carry;
++  // }
++  //
++  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_indexof_char");
-+}
++  bind(L_second_loop_aligned);
++  mv(carry, zr); // carry = 0;
++  mv(jdx, ylen); // j = ystart+1
 +
-+// Search for needle in haystack and return index or -1
-+// x10: result
-+// x11: haystack
-+// x12: haystack_len
-+// x13: needle
-+// x14: needle_len
-+void MacroAssembler::string_indexof(Register haystack, Register needle,
-+                                    Register haystack_len, Register needle_len,
-+                                    Register tmp1, Register tmp2,
-+                                    Register tmp3, Register tmp4,
-+                                    Register tmp5, Register tmp6,
-+                                    Register result, int ae)
-+{
-+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
++  subw(xstart, xstart, 1); // i = xstart-1;
++  bltz(xstart, L_done);
 +
-+  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
++  sub(sp, sp, 4 * wordSize);
++  sd(z, Address(sp, 0));
 +
-+  Register ch1 = t0;
-+  Register ch2 = t1;
-+  Register nlen_tmp = tmp1; // needle len tmp
-+  Register hlen_tmp = tmp2; // haystack len tmp
-+  Register result_tmp = tmp4;
-+
-+  bool isLL = ae == StrIntrinsicNode::LL;
-+
-+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-+  int needle_chr_shift = needle_isL ? 0 : 1;
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  int needle_chr_size = needle_isL ? 1 : 2;
-+  int haystack_chr_size = haystack_isL ? 1 : 2;
-+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                              (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                (load_chr_insn)&MacroAssembler::lhu;
-+
-+  BLOCK_COMMENT("string_indexof {");
-+
-+  // Note, inline_string_indexOf() generates checks:
-+  // if (pattern.count > src.count) return -1;
-+  // if (pattern.count == 0) return 0;
-+
-+  // We have two strings, a source string in haystack, haystack_len and a pattern string
-+  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
-+
-+  // For larger pattern and source we use a simplified Boyer Moore algorithm.
-+  // With a small pattern and source we use linear scan.
-+
-+  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
-+  sub(result_tmp, haystack_len, needle_len);
-+  // needle_len < 8, use linear scan
-+  sub(t0, needle_len, 8);
-+  bltz(t0, LINEARSEARCH);
-+  // needle_len >= 256, use linear scan
-+  sub(t0, needle_len, 256);
-+  bgez(t0, LINEARSTUB);
-+  // needle_len >= haystack_len/4, use linear scan
-+  srli(t0, haystack_len, 2);
-+  bge(needle_len, t0, LINEARSTUB);
-+
-+  // Boyer-Moore-Horspool introduction:
-+  // The Boyer Moore alogorithm is based on the description here:-
-+  //
-+  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
-+  //
-+  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
-+  // and the 'Good Suffix' rule.
-+  //
-+  // These rules are essentially heuristics for how far we can shift the
-+  // pattern along the search string.
-+  //
-+  // The implementation here uses the 'Bad Character' rule only because of the
-+  // complexity of initialisation for the 'Good Suffix' rule.
-+  //
-+  // This is also known as the Boyer-Moore-Horspool algorithm:
-+  //
-+  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
-+  //
-+  // #define ASIZE 256
-+  //
-+  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
-+  //      int i, j;
-+  //      unsigned c;
-+  //      unsigned char bc[ASIZE];
-+  //
-+  //      /* Preprocessing */
-+  //      for (i = 0; i < ASIZE; ++i)
-+  //        bc[i] = m;
-+  //      for (i = 0; i < m - 1; ) {
-+  //        c = pattern[i];
-+  //        ++i;
-+  //        // c < 256 for Latin1 string, so, no need for branch
-+  //        #ifdef PATTERN_STRING_IS_LATIN1
-+  //        bc[c] = m - i;
-+  //        #else
-+  //        if (c < ASIZE) bc[c] = m - i;
-+  //        #endif
-+  //      }
-+  //
-+  //      /* Searching */
-+  //      j = 0;
-+  //      while (j <= n - m) {
-+  //        c = src[i+j];
-+  //        if (pattern[m-1] == c)
-+  //          int k;
-+  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
-+  //          if (k < 0) return j;
-+  //          // c < 256 for Latin1 string, so, no need for branch
-+  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
-+  //          // LL case: (c< 256) always true. Remove branch
-+  //          j += bc[pattern[j+m-1]];
-+  //          #endif
-+  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
-+  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
-+  //          if (c < ASIZE)
-+  //            j += bc[pattern[j+m-1]];
-+  //          else
-+  //            j += 1
-+  //          #endif
-+  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
-+  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
-+  //          if (c < ASIZE)
-+  //            j += bc[pattern[j+m-1]];
-+  //          else
-+  //            j += m
-+  //          #endif
-+  //      }
-+  //      return -1;
-+  //    }
-+
-+  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
-+  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
-+        BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
++  Label L_last_x;
++  shadd(t0, xstart, z, t0, LogBytesPerInt);
++  addi(z, t0, 4);
++  subw(xstart, xstart, 1); // i = xstart-1;
++  bltz(xstart, L_last_x);
 +
-+  Register haystack_end = haystack_len;
-+  Register skipch = tmp2;
++  shadd(t0, xstart, x, t0, LogBytesPerInt);
++  ld(product_hi, Address(t0, 0));
++  ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
 +
-+  // pattern length is >=8, so, we can read at least 1 register for cases when
-+  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
-+  // UL case. We'll re-read last character in inner pre-loop code to have
-+  // single outer pre-loop load
-+  const int firstStep = isLL ? 7 : 3;
++  Label L_third_loop_prologue;
++  bind(L_third_loop_prologue);
 +
-+  const int ASIZE = 256;
-+  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
++  sd(ylen, Address(sp, wordSize));
++  sd(x, Address(sp, 2 * wordSize));
++  sd(xstart, Address(sp, 3 * wordSize));
++  multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
++                          tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
++  ld(z, Address(sp, 0));
++  ld(ylen, Address(sp, wordSize));
++  ld(x, Address(sp, 2 * wordSize));
++  ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
++  addi(sp, sp, 4 * wordSize);
 +
-+  sub(sp, sp, ASIZE);
++  addiw(tmp3, xlen, 1);
++  shadd(t0, tmp3, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
 +
-+  // init BC offset table with default value: needle_len
-+  slli(t0, needle_len, 8);
-+  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
-+  slli(tmp1, t0, 16);
-+  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
-+  slli(tmp1, t0, 32);
-+  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
++  subw(tmp3, tmp3, 1);
++  bltz(tmp3, L_done);
 +
-+  mv(ch1, sp);  // ch1 is t0
-+  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
++  srli(carry, carry, 32);
++  shadd(t0, tmp3, z, t0, LogBytesPerInt);
++  sw(carry, Address(t0, 0));
++  j(L_second_loop_aligned);
 +
-+  bind(BM_INIT_LOOP);
-+  // for (i = 0; i < ASIZE; ++i)
-+  //   bc[i] = m;
-+  for (int i = 0; i < 4; i++) {
-+    sd(tmp5, Address(ch1, i * wordSize));
-+  }
-+  add(ch1, ch1, 32);
-+  sub(tmp6, tmp6, 4);
-+  bgtz(tmp6, BM_INIT_LOOP);
++  // Next infrequent code is moved outside loops.
++  bind(L_last_x);
++  lwu(product_hi, Address(x, 0));
++  j(L_third_loop_prologue);
 +
-+  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
-+  Register orig_haystack = tmp5;
-+  mv(orig_haystack, haystack);
-+  // result_tmp = tmp4
-+  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
-+  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
-+  mv(tmp3, needle);
++  bind(L_done);
++}
++#endif
 +
-+  //  for (i = 0; i < m - 1; ) {
-+  //    c = pattern[i];
-+  //    ++i;
-+  //    // c < 256 for Latin1 string, so, no need for branch
-+  //    #ifdef PATTERN_STRING_IS_LATIN1
-+  //    bc[c] = m - i;
-+  //    #else
-+  //    if (c < ASIZE) bc[c] = m - i;
-+  //    #endif
-+  //  }
-+  bind(BCLOOP);
-+  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
-+  add(tmp3, tmp3, needle_chr_size);
-+  if (!needle_isL) {
-+    // ae == StrIntrinsicNode::UU
-+    mv(tmp6, ASIZE);
-+    bgeu(ch1, tmp6, BCSKIP);
++// Count bits of trailing zero chars from lsb to msb until first non-zero element.
++// For LL case, one byte for one element, so shift 8 bits once, and for other case,
++// shift 16 bits once.
++void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
++{
++  if (UseZbb) {
++    assert_different_registers(Rd, Rs, tmp1);
++    int step = isLL ? 8 : 16;
++    ctz(Rd, Rs);
++    andi(tmp1, Rd, step - 1);
++    sub(Rd, Rd, tmp1);
++    return;
 +  }
-+  add(tmp4, sp, ch1);
-+  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
++  assert_different_registers(Rd, Rs, tmp1, tmp2);
++  Label Loop;
++  int step = isLL ? 8 : 16;
++  li(Rd, -step);
++  mv(tmp2, Rs);
 +
-+  bind(BCSKIP);
-+  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
-+  bgtz(ch2, BCLOOP);
++  bind(Loop);
++  addi(Rd, Rd, step);
++  andi(tmp1, tmp2, ((1 << step) - 1));
++  srli(tmp2, tmp2, step);
++  beqz(tmp1, Loop);
++}
 +
-+  // tmp6: pattern end, address after needle
-+  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
-+  if (needle_isL == haystack_isL) {
-+    // load last 8 bytes (8LL/4UU symbols)
-+    ld(tmp6, Address(tmp6, -wordSize));
-+  } else {
-+    // UL: from UTF-16(source) search Latin1(pattern)
-+    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
-+    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
-+    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
-+    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
-+    slli(ch2, tmp6, XLEN - 24);
-+    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
-+    slli(ch1, tmp6, XLEN - 16);
-+    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
-+    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
-+    slli(ch2, ch2, 16);
-+    orr(ch2, ch2, ch1); // 0x00000b0c
-+    slli(result, tmp3, 48); // use result as temp register
-+    orr(tmp6, tmp6, result); // 0x0a00000d
-+    slli(result, ch2, 16);
-+    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
++// This instruction reads adjacent 4 bytes from the lower half of source register,
++// inflate into a register, for example:
++// Rs: A7A6A5A4A3A2A1A0
++// Rd: 00A300A200A100A0
++void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
++{
++  assert_different_registers(Rd, Rs, tmp1, tmp2);
++  li(tmp1, 0xFF);
++  mv(Rd, zr);
++  for (int i = 0; i <= 3; i++)
++  {
++    andr(tmp2, Rs, tmp1);
++    if (i) {
++      slli(tmp2, tmp2, i * 8);
++    }
++    orr(Rd, Rd, tmp2);
++    if (i != 3) {
++      slli(tmp1, tmp1, 8);
++    }
 +  }
++}
 +
-+  // i = m - 1;
-+  // skipch = j + i;
-+  // if (skipch == pattern[m - 1]
-+  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
-+  // else
-+  //   move j with bad char offset table
-+  bind(BMLOOPSTR2);
-+  // compare pattern to source string backward
-+  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
-+  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
-+  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
-+  if (needle_isL == haystack_isL) {
-+    // re-init tmp3. It's for free because it's executed in parallel with
-+    // load above. Alternative is to initialize it before loop, but it'll
-+    // affect performance on in-order systems with 2 or more ld/st pipelines
-+    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
-+  }
-+  if (!isLL) { // UU/UL case
-+    slli(ch2, nlen_tmp, 1); // offsets in bytes
-+  }
-+  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
-+  add(result, haystack, isLL ? nlen_tmp : ch2);
-+  ld(ch2, Address(result)); // load 8 bytes from source string
-+  mv(ch1, tmp6);
-+  if (isLL) {
-+    j(BMLOOPSTR1_AFTER_LOAD);
-+  } else {
-+    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
-+    j(BMLOOPSTR1_CMP);
++// This instruction reads adjacent 4 bytes from the upper half of source register,
++// inflate into a register, for example:
++// Rs: A7A6A5A4A3A2A1A0
++// Rd: 00A700A600A500A4
++void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
++{
++  assert_different_registers(Rd, Rs, tmp1, tmp2);
++  li(tmp1, 0xFF00000000);
++  mv(Rd, zr);
++  for (int i = 0; i <= 3; i++)
++  {
++    andr(tmp2, Rs, tmp1);
++    orr(Rd, Rd, tmp2);
++    srli(Rd, Rd, 8);
++    if (i != 3) {
++      slli(tmp1, tmp1, 8);
++    }
 +  }
++}
 +
-+  bind(BMLOOPSTR1);
-+  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
-+  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
-+  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-+  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++// The size of the blocks erased by the zero_blocks stub.  We must
++// handle anything smaller than this ourselves in zero_words().
++const int MacroAssembler::zero_words_block_size = 8;
 +
-+  bind(BMLOOPSTR1_AFTER_LOAD);
-+  sub(nlen_tmp, nlen_tmp, 1);
-+  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
++// zero_words() is used by C2 ClearArray patterns.  It is as small as
++// possible, handling small word counts locally and delegating
++// anything larger to the zero_blocks stub.  It is expanded many times
++// in compiled code, so it is important to keep it short.
 +
-+  bind(BMLOOPSTR1_CMP);
-+  beq(ch1, ch2, BMLOOPSTR1);
++// ptr:   Address of a buffer to be zeroed.
++// cnt:   Count in HeapWords.
++//
++// ptr, cnt, and t0 are clobbered.
++address MacroAssembler::zero_words(Register ptr, Register cnt)
++{
++  assert(is_power_of_2(zero_words_block_size), "adjust this");
++  assert(ptr == x28 && cnt == x29, "mismatch in register usage");
++  assert_different_registers(cnt, t0);
 +
-+  bind(BMSKIP);
-+  if (!isLL) {
-+    // if we've met UTF symbol while searching Latin1 pattern, then we can
-+    // skip needle_len symbols
-+    if (needle_isL != haystack_isL) {
-+      mv(result_tmp, needle_len);
++  BLOCK_COMMENT("zero_words {");
++  mv(t0, zero_words_block_size);
++  Label around, done, done16;
++  bltu(cnt, t0, around);
++  {
++    RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
++    assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
++    if (StubRoutines::riscv::complete()) {
++      address tpc = trampoline_call(zero_blocks);
++      if (tpc == NULL) {
++        DEBUG_ONLY(reset_labels1(around));
++        postcond(pc() == badAddress);
++        return NULL;
++      }
 +    } else {
-+      mv(result_tmp, 1);
++      jal(zero_blocks);
 +    }
-+    mv(t0, ASIZE);
-+    bgeu(skipch, t0, BMADV);
 +  }
-+  add(result_tmp, sp, skipch);
-+  lbu(result_tmp, Address(result_tmp)); // load skip offset
++  bind(around);
++  for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
++    Label l;
++    andi(t0, cnt, i);
++    beqz(t0, l);
++    for (int j = 0; j < i; j++) {
++      sd(zr, Address(ptr, 0));
++      addi(ptr, ptr, 8);
++    }
++    bind(l);
++  }
++  {
++    Label l;
++    andi(t0, cnt, 1);
++    beqz(t0, l);
++    sd(zr, Address(ptr, 0));
++    bind(l);
++  }
++  BLOCK_COMMENT("} zero_words");
++  postcond(pc() != badAddress);
++  return pc();
++}
 +
-+  bind(BMADV);
-+  sub(nlen_tmp, needle_len, 1);
-+  // move haystack after bad char skip offset
-+  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
-+  ble(haystack, haystack_end, BMLOOPSTR2);
-+  add(sp, sp, ASIZE);
-+  j(NOMATCH);
++#define SmallArraySize (18 * BytesPerLong)
 +
-+  bind(BMLOOPSTR1_LASTCMP);
-+  bne(ch1, ch2, BMSKIP);
++// base:  Address of a buffer to be zeroed, 8 bytes aligned.
++// cnt:   Immediate count in HeapWords.
++void MacroAssembler::zero_words(Register base, u_int64_t cnt)
++{
++  assert_different_registers(base, t0, t1);
 +
-+  bind(BMMATCH);
-+  sub(result, haystack, orig_haystack);
-+  if (!haystack_isL) {
-+    srli(result, result, 1);
-+  }
-+  add(sp, sp, ASIZE);
-+  j(DONE);
++  BLOCK_COMMENT("zero_words {");
 +
-+  bind(LINEARSTUB);
-+  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
-+  bltz(t0, LINEARSEARCH);
-+  mv(result, zr);
-+  RuntimeAddress stub = NULL;
-+  if (isLL) {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
-+    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
-+  } else if (needle_isL) {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
-+    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
++  if (cnt <= SmallArraySize / BytesPerLong) {
++    for (int i = 0; i < (int)cnt; i++) {
++      sd(zr, Address(base, i * wordSize));
++    }
 +  } else {
-+    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
-+    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
-+  }
-+  trampoline_call(stub);
-+  j(DONE);
-+
-+  bind(NOMATCH);
-+  mv(result, -1);
-+  j(DONE);
++    const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
++    int remainder = cnt % unroll;
++    for (int i = 0; i < remainder; i++) {
++      sd(zr, Address(base, i * wordSize));
++    }
 +
-+  bind(LINEARSEARCH);
-+  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
++    Label loop;
++    Register cnt_reg = t0;
++    Register loop_base = t1;
++    cnt = cnt - remainder;
++    li(cnt_reg, cnt);
++    add(loop_base, base, remainder * wordSize);
++    bind(loop);
++    sub(cnt_reg, cnt_reg, unroll);
++    for (int i = 0; i < unroll; i++) {
++      sd(zr, Address(loop_base, i * wordSize));
++    }
++    add(loop_base, loop_base, unroll * wordSize);
++    bnez(cnt_reg, loop);
++  }
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_indexof");
++  BLOCK_COMMENT("} zero_words");
 +}
 +
-+// string_indexof
-+// result: x10
-+// src: x11
-+// src_count: x12
-+// pattern: x13
-+// pattern_count: x14 or 1/2/3/4
-+void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
-+                                               Register haystack_len, Register needle_len,
-+                                               Register tmp1, Register tmp2,
-+                                               Register tmp3, Register tmp4,
-+                                               int needle_con_cnt, Register result, int ae)
++// base:   Address of a buffer to be filled, 8 bytes aligned.
++// cnt:    Count in 8-byte unit.
++// value:  Value to be filled with.
++// base will point to the end of the buffer after filling.
++void MacroAssembler::fill_words(Register base, Register cnt, Register value)
 +{
-+  // Note:
-+  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
-+  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
-+  assert(needle_con_cnt <= 4, "Invalid needle constant count");
-+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
++//  Algorithm:
++//
++//    t0 = cnt & 7
++//    cnt -= t0
++//    p += t0
++//    switch (t0):
++//      switch start:
++//      do while cnt
++//        cnt -= 8
++//          p[-8] = value
++//        case 7:
++//          p[-7] = value
++//        case 6:
++//          p[-6] = value
++//          // ...
++//        case 1:
++//          p[-1] = value
++//        case 0:
++//          p += 8
++//      do-while end
++//    switch end
 +
-+  Register ch1 = t0;
-+  Register ch2 = t1;
-+  Register hlen_neg = haystack_len, nlen_neg = needle_len;
-+  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
++  assert_different_registers(base, cnt, value, t0, t1);
 +
-+  bool isLL = ae == StrIntrinsicNode::LL;
++  Label fini, skip, entry, loop;
++  const int unroll = 8; // Number of sd instructions we'll unroll
 +
-+  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
-+  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
-+  int needle_chr_shift = needle_isL ? 0 : 1;
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  int needle_chr_size = needle_isL ? 1 : 2;
-+  int haystack_chr_size = haystack_isL ? 1 : 2;
++  beqz(cnt, fini);
 +
-+  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                              (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
-+                                (load_chr_insn)&MacroAssembler::lhu;
-+  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
-+  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
++  andi(t0, cnt, unroll - 1);
++  sub(cnt, cnt, t0);
++  // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
++  shadd(base, t0, base, t1, 3);
++  la(t1, entry);
++  slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
++  sub(t1, t1, t0);
++  jr(t1);
 +
-+  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
++  bind(loop);
++  add(base, base, unroll * 8);
++  for (int i = -unroll; i < 0; i++) {
++    sd(value, Address(base, i * 8));
++  }
++  bind(entry);
++  sub(cnt, cnt, unroll);
++  bgez(cnt, loop);
 +
-+  Register first = tmp3;
++  bind(fini);
++}
 +
-+  if (needle_con_cnt == -1) {
-+    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
++#define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
++void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
++  Label L_Okay;                                                                                  \
++  fscsr(zr);                                                                                     \
++  FLOATCVT(dst, src);                                                                            \
++  frcsr(tmp);                                                                                    \
++  andi(tmp, tmp, 0x1E);                                                                          \
++  beqz(tmp, L_Okay);                                                                             \
++  FLOATEQ(tmp, src, src);                                                                        \
++  bnez(tmp, L_Okay);                                                                             \
++  mv(dst, zr);                                                                                   \
++  bind(L_Okay);                                                                                  \
++}
 +
-+    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
-+    bltz(t0, DOSHORT);
++FCVT_SAFE(fcvt_w_s, feq_s)
++FCVT_SAFE(fcvt_l_s, feq_s)
++FCVT_SAFE(fcvt_w_d, feq_d)
++FCVT_SAFE(fcvt_l_d, feq_d)
 +
-+    (this->*needle_load_1chr)(first, Address(needle), noreg);
-+    slli(t0, needle_len, needle_chr_shift);
-+    add(needle, needle, t0);
-+    neg(nlen_neg, t0);
-+    slli(t0, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, t0);
-+    neg(hlen_neg, t0);
++#undef FCVT_SAFE
 +
-+    bind(FIRST_LOOP);
-+    add(t0, haystack, hlen_neg);
-+    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
-+    beq(first, ch2, STR1_LOOP);
++#define FCMP(FLOATTYPE, FLOATSIG)                                                       \
++void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
++                                         FloatRegister Rs2, int unordered_result) {     \
++  Label Ldone;                                                                          \
++  if (unordered_result < 0) {                                                           \
++    /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
++    /* installs 1 if gt else 0 */                                                       \
++    flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
++    /* Rs1 > Rs2, install 1 */                                                          \
++    bgtz(result, Ldone);                                                                \
++    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
++    addi(result, result, -1);                                                           \
++    /* Rs1 = Rs2, install 0 */                                                          \
++    /* NaN or Rs1 < Rs2, install -1 */                                                  \
++    bind(Ldone);                                                                        \
++  } else {                                                                              \
++    /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
++    /* installs 1 if gt or unordered else 0 */                                          \
++    flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
++    /* Rs1 < Rs2, install -1 */                                                         \
++    bgtz(result, Ldone);                                                                \
++    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
++    addi(result, result, -1);                                                           \
++    /* Rs1 = Rs2, install 0 */                                                          \
++    /* NaN or Rs1 > Rs2, install 1 */                                                   \
++    bind(Ldone);                                                                        \
++    neg(result, result);                                                                \
++  }                                                                                     \
++}
 +
-+    bind(STR2_NEXT);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, FIRST_LOOP);
-+    j(NOMATCH);
++FCMP(float, s);
++FCMP(double, d);
 +
-+    bind(STR1_LOOP);
-+    add(nlen_tmp, nlen_neg, needle_chr_size);
-+    add(hlen_tmp, hlen_neg, haystack_chr_size);
-+    bgez(nlen_tmp, MATCH);
++#undef FCMP
 +
-+    bind(STR1_NEXT);
-+    add(ch1, needle, nlen_tmp);
-+    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
-+    add(ch2, haystack, hlen_tmp);
-+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+    bne(ch1, ch2, STR2_NEXT);
-+    add(nlen_tmp, nlen_tmp, needle_chr_size);
-+    add(hlen_tmp, hlen_tmp, haystack_chr_size);
-+    bltz(nlen_tmp, STR1_NEXT);
-+    j(MATCH);
++// Zero words; len is in bytes
++// Destroys all registers except addr
++// len must be a nonzero multiple of wordSize
++void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
++  assert_different_registers(addr, len, tmp, t0, t1);
 +
-+    bind(DOSHORT);
-+    if (needle_isL == haystack_isL) {
-+      sub(t0, needle_len, 2);
-+      bltz(t0, DO1);
-+      bgtz(t0, DO3);
-+    }
++#ifdef ASSERT
++  {
++    Label L;
++    andi(t0, len, BytesPerWord - 1);
++    beqz(t0, L);
++    stop("len is not a multiple of BytesPerWord");
++    bind(L);
 +  }
++#endif // ASSERT
 +
-+  if (needle_con_cnt == 4) {
-+    Label CH1_LOOP;
-+    (this->*load_4chr)(ch1, Address(needle), noreg);
-+    sub(result_tmp, haystack_len, 4);
-+    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
++#ifndef PRODUCT
++  block_comment("zero memory");
++#endif // PRODUCT
 +
-+    bind(CH1_LOOP);
-+    add(ch2, haystack, hlen_neg);
-+    (this->*load_4chr)(ch2, Address(ch2), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, CH1_LOOP);
-+    j(NOMATCH);
-+  }
++  Label loop;
++  Label entry;
 +
-+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
-+    Label CH1_LOOP;
-+    BLOCK_COMMENT("string_indexof DO2 {");
-+    bind(DO2);
-+    (this->*load_2chr)(ch1, Address(needle), noreg);
-+    if (needle_con_cnt == 2) {
-+      sub(result_tmp, haystack_len, 2);
-+    }
-+    slli(tmp3, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
++  // Algorithm:
++  //
++  //  t0 = cnt & 7
++  //  cnt -= t0
++  //  p += t0
++  //  switch (t0) {
++  //    do {
++  //      cnt -= 8
++  //        p[-8] = 0
++  //      case 7:
++  //        p[-7] = 0
++  //      case 6:
++  //        p[-6] = 0
++  //        ...
++  //      case 1:
++  //        p[-1] = 0
++  //      case 0:
++  //        p += 8
++  //     } while (cnt)
++  //  }
 +
-+    bind(CH1_LOOP);
-+    add(tmp3, haystack, hlen_neg);
-+    (this->*load_2chr)(ch2, Address(tmp3), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, CH1_LOOP);
-+    j(NOMATCH);
-+    BLOCK_COMMENT("} string_indexof DO2");
-+  }
++  const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
 +
-+  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
-+    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
-+    BLOCK_COMMENT("string_indexof DO3 {");
++  srli(len, len, LogBytesPerWord);
++  andi(t0, len, unroll - 1);  // t0 = cnt % unroll
++  sub(len, len, t0);          // cnt -= unroll
++  // tmp always points to the end of the region we're about to zero
++  shadd(tmp, t0, addr, t1, LogBytesPerWord);
++  la(t1, entry);
++  slli(t0, t0, 2);
++  sub(t1, t1, t0);
++  jr(t1);
++  bind(loop);
++  sub(len, len, unroll);
++  for (int i = -unroll; i < 0; i++) {
++    Assembler::sd(zr, Address(tmp, i * wordSize));
++  }
++  bind(entry);
++  add(tmp, tmp, unroll * wordSize);
++  bnez(len, loop);
++}
 +
-+    bind(DO3);
-+    (this->*load_2chr)(first, Address(needle), noreg);
-+    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
-+    if (needle_con_cnt == 3) {
-+      sub(result_tmp, haystack_len, 3);
++// shift left by shamt and add
++// Rd = (Rs1 << shamt) + Rs2
++void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
++  if (UseZba) {
++    if (shamt == 1) {
++      sh1add(Rd, Rs1, Rs2);
++      return;
++    } else if (shamt == 2) {
++      sh2add(Rd, Rs1, Rs2);
++      return;
++    } else if (shamt == 3) {
++      sh3add(Rd, Rs1, Rs2);
++      return;
 +    }
-+    slli(hlen_tmp, result_tmp, haystack_chr_shift);
-+    add(haystack, haystack, hlen_tmp);
-+    neg(hlen_neg, hlen_tmp);
++  }
 +
-+    bind(FIRST_LOOP);
-+    add(ch2, haystack, hlen_neg);
-+    (this->*load_2chr)(ch2, Address(ch2), noreg);
-+    beq(first, ch2, STR1_LOOP);
++  if (shamt != 0) {
++    slli(tmp, Rs1, shamt);
++    add(Rd, Rs2, tmp);
++  } else {
++    add(Rd, Rs1, Rs2);
++  }
++}
 +
-+    bind(STR2_NEXT);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, FIRST_LOOP);
-+    j(NOMATCH);
++void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
++  if (UseZba && bits == 32) {
++    zext_w(dst, src);
++    return;
++  }
 +
-+    bind(STR1_LOOP);
-+    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
-+    add(ch2, haystack, hlen_tmp);
-+    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
-+    bne(ch1, ch2, STR2_NEXT);
-+    j(MATCH);
-+    BLOCK_COMMENT("} string_indexof DO3");
++  if (UseZbb && bits == 16) {
++    zext_h(dst, src);
++    return;
 +  }
 +
-+  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
-+    Label DO1_LOOP;
++  if (bits == 8) {
++    zext_b(dst, src);
++  } else {
++    slli(dst, src, XLEN - bits);
++    srli(dst, dst, XLEN - bits);
++  }
++}
 +
-+    BLOCK_COMMENT("string_indexof DO1 {");
-+    bind(DO1);
-+    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
-+    sub(result_tmp, haystack_len, 1);
-+    mv(tmp3, result_tmp);
-+    if (haystack_chr_shift) {
-+      slli(tmp3, result_tmp, haystack_chr_shift);
++void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
++  if (UseZbb) {
++    if (bits == 8) {
++      sext_b(dst, src);
++      return;
++    } else if (bits == 16) {
++      sext_h(dst, src);
++      return;
 +    }
-+    add(haystack, haystack, tmp3);
-+    neg(hlen_neg, tmp3);
-+
-+    bind(DO1_LOOP);
-+    add(tmp3, haystack, hlen_neg);
-+    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
-+    beq(ch1, ch2, MATCH);
-+    add(hlen_neg, hlen_neg, haystack_chr_size);
-+    blez(hlen_neg, DO1_LOOP);
-+    BLOCK_COMMENT("} string_indexof DO1");
 +  }
 +
-+  bind(NOMATCH);
-+  mv(result, -1);
-+  j(DONE);
-+
-+  bind(MATCH);
-+  srai(t0, hlen_neg, haystack_chr_shift);
-+  add(result, result_tmp, t0);
-+
-+  bind(DONE);
++  if (bits == 32) {
++    sext_w(dst, src);
++  } else {
++    slli(dst, src, XLEN - bits);
++    srai(dst, dst, XLEN - bits);
++  }
 +}
 +
-+void MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
-+                                     VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) {
-+  Label loop;
-+  Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16;
-+
-+  bind(loop);
-+  vsetvli(tmp1, cnt, sew, Assembler::m2);
-+  vlex_v(vr1, a1, sew);
-+  vlex_v(vr2, a2, sew);
-+  vmsne_vv(vrs, vr1, vr2);
-+  vfirst_m(tmp2, vrs);
-+  bgez(tmp2, DONE);
-+  sub(cnt, cnt, tmp1);
-+  if (!islatin) {
-+    slli(tmp1, tmp1, 1); // get byte counts
-+  }
-+  add(a1, a1, tmp1);
-+  add(a2, a2, tmp1);
-+  bnez(cnt, loop);
++void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
++{
++  if (src1 == src2) {
++    mv(dst, zr);
++    return;
++  }
++  Label done;
++  Register left = src1;
++  Register right = src2;
++  if (dst == src1) {
++    assert_different_registers(dst, src2, tmp);
++    mv(tmp, src1);
++    left = tmp;
++  } else if (dst == src2) {
++    assert_different_registers(dst, src1, tmp);
++    mv(tmp, src2);
++    right = tmp;
++  }
 +
-+  mv(result, true);
++  // installs 1 if gt else 0
++  slt(dst, right, left);
++  bnez(dst, done);
++  slt(dst, left, right);
++  // dst = -1 if lt; else if eq , dst = 0
++  neg(dst, dst);
++  bind(done);
 +}
 +
-+void MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) {
-+  Label DONE;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+
-+  BLOCK_COMMENT("string_equals_v {");
++#ifdef COMPILER2
++// short string
++// StringUTF16.indexOfChar
++// StringLatin1.indexOfChar
++void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1,
++                                                  Register ch, Register result,
++                                                  bool isL)
++{
++  Register ch1 = t0;
++  Register index = t1;
 +
-+  mv(result, false);
++  BLOCK_COMMENT("string_indexof_char_short {");
 +
-+  if (elem_size == 2) {
-+    srli(cnt, cnt, 1);
-+  }
++  Label LOOP, LOOP1, LOOP4, LOOP8;
++  Label MATCH,  MATCH1, MATCH2, MATCH3,
++          MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH;
 +
-+  element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
++  mv(result, -1);
++  mv(index, zr);
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} string_equals_v");
-+}
++  bind(LOOP);
++  addi(t0, index, 8);
++  ble(t0, cnt1, LOOP8);
++  addi(t0, index, 4);
++  ble(t0, cnt1, LOOP4);
++  j(LOOP1);
 +
-+// used by C2 ClearArray patterns.
-+// base: Address of a buffer to be zeroed
-+// cnt: Count in HeapWords
-+//
-+// base, cnt, v0, v1 and t0 are clobbered.
-+void MacroAssembler::clear_array_v(Register base, Register cnt) {
-+  Label loop;
++  bind(LOOP8);
++  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
++  beq(ch, ch1, MATCH);
++  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
++  beq(ch, ch1, MATCH1);
++  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
++  beq(ch, ch1, MATCH2);
++  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
++  beq(ch, ch1, MATCH3);
++  isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8));
++  beq(ch, ch1, MATCH4);
++  isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10));
++  beq(ch, ch1, MATCH5);
++  isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12));
++  beq(ch, ch1, MATCH6);
++  isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14));
++  beq(ch, ch1, MATCH7);
++  addi(index, index, 8);
++  addi(str1, str1, isL ? 8 : 16);
++  blt(index, cnt1, LOOP);
++  j(NOMATCH);
 +
-+  // making zero words
-+  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
-+  vxor_vv(v0, v0, v0);
++  bind(LOOP4);
++  isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0));
++  beq(ch, ch1, MATCH);
++  isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2));
++  beq(ch, ch1, MATCH1);
++  isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4));
++  beq(ch, ch1, MATCH2);
++  isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6));
++  beq(ch, ch1, MATCH3);
++  addi(index, index, 4);
++  addi(str1, str1, isL ? 4 : 8);
++  bge(index, cnt1, NOMATCH);
 +
-+  bind(loop);
-+  vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
-+  vse64_v(v0, base);
-+  sub(cnt, cnt, t0);
-+  shadd(base, t0, base, t0, 3);
-+  bnez(cnt, loop);
-+}
++  bind(LOOP1);
++  isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1));
++  beq(ch, ch1, MATCH);
++  addi(index, index, 1);
++  addi(str1, str1, isL ? 1 : 2);
++  blt(index, cnt1, LOOP1);
++  j(NOMATCH);
 +
-+void MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result,
-+                                     Register cnt1, int elem_size) {
-+  Label DONE;
-+  Register tmp1 = t0;
-+  Register tmp2 = t1;
-+  Register cnt2 = tmp2;
-+  int length_offset = arrayOopDesc::length_offset_in_bytes();
-+  int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
++  bind(MATCH1);
++  addi(index, index, 1);
++  j(MATCH);
 +
-+  BLOCK_COMMENT("arrays_equals_v {");
++  bind(MATCH2);
++  addi(index, index, 2);
++  j(MATCH);
 +
-+  // if (a1 == a2), return true
-+  mv(result, true);
-+  oop_equal(a1, a2, DONE);
++  bind(MATCH3);
++  addi(index, index, 3);
++  j(MATCH);
 +
-+  mv(result, false);
-+  // if a1 == null or a2 == null, return false
-+  beqz(a1, DONE);
-+  beqz(a2, DONE);
-+  // if (a1.length != a2.length), return false
-+  lwu(cnt1, Address(a1, length_offset));
-+  lwu(cnt2, Address(a2, length_offset));
-+  bne(cnt1, cnt2, DONE);
++  bind(MATCH4);
++  addi(index, index, 4);
++  j(MATCH);
 +
-+  la(a1, Address(a1, base_offset));
-+  la(a2, Address(a2, base_offset));
++  bind(MATCH5);
++  addi(index, index, 5);
++  j(MATCH);
 +
-+  element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE);
++  bind(MATCH6);
++  addi(index, index, 6);
++  j(MATCH);
 +
-+  bind(DONE);
++  bind(MATCH7);
++  addi(index, index, 7);
 +
-+  BLOCK_COMMENT("} arrays_equals_v");
++  bind(MATCH);
++  mv(result, index);
++  bind(NOMATCH);
++  BLOCK_COMMENT("} string_indexof_char_short");
 +}
 +
-+void MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
-+                                      Register result, Register tmp1, Register tmp2, int encForm) {
-+  Label DIFFERENCE, DONE, L, loop;
-+  bool encLL = encForm == StrIntrinsicNode::LL;
-+  bool encLU = encForm == StrIntrinsicNode::LU;
-+  bool encUL = encForm == StrIntrinsicNode::UL;
++// StringUTF16.indexOfChar
++// StringLatin1.indexOfChar
++void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
++                                            Register ch, Register result,
++                                            Register tmp1, Register tmp2,
++                                            Register tmp3, Register tmp4,
++                                            bool isL)
++{
++  Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG;
++  Register ch1 = t0;
++  Register orig_cnt = t1;
++  Register mask1 = tmp3;
++  Register mask2 = tmp2;
++  Register match_mask = tmp1;
++  Register trailing_char = tmp4;
++  Register unaligned_elems = tmp4;
 +
-+  bool str1_isL = encLL || encLU;
-+  bool str2_isL = encLL || encUL;
++  BLOCK_COMMENT("string_indexof_char {");
++  beqz(cnt1, NOMATCH);
 +
-+  int minCharsInWord = encLL ? wordSize : wordSize / 2;
++  addi(t0, cnt1, isL ? -32 : -16);
++  bgtz(t0, DO_LONG);
++  string_indexof_char_short(str1, cnt1, ch, result, isL);
++  j(DONE);
 +
-+  BLOCK_COMMENT("string_compare {");
++  bind(DO_LONG);
++  mv(orig_cnt, cnt1);
++  if (AvoidUnalignedAccesses) {
++    Label ALIGNED;
++    andi(unaligned_elems, str1, 0x7);
++    beqz(unaligned_elems, ALIGNED);
++    sub(unaligned_elems, unaligned_elems, 8);
++    neg(unaligned_elems, unaligned_elems);
++    if (!isL) {
++      srli(unaligned_elems, unaligned_elems, 1);
++    }
++    // do unaligned part per element
++    string_indexof_char_short(str1, unaligned_elems, ch, result, isL);
++    bgez(result, DONE);
++    mv(orig_cnt, cnt1);
++    sub(cnt1, cnt1, unaligned_elems);
++    bind(ALIGNED);
++  }
 +
-+  // for Lating strings, 1 byte for 1 character
-+  // for UTF16 strings, 2 bytes for 1 character
-+  if (!str1_isL)
-+    sraiw(cnt1, cnt1, 1);
-+  if (!str2_isL)
-+    sraiw(cnt2, cnt2, 1);
++  // duplicate ch
++  if (isL) {
++    slli(ch1, ch, 8);
++    orr(ch, ch1, ch);
++  }
++  slli(ch1, ch, 16);
++  orr(ch, ch1, ch);
++  slli(ch1, ch, 32);
++  orr(ch, ch1, ch);
 +
-+  // if str1 == str2, return the difference
-+  // save the minimum of the string lengths in cnt2.
-+  sub(result, cnt1, cnt2);
-+  bgt(cnt1, cnt2, L);
-+  mv(cnt2, cnt1);
-+  bind(L);
++  if (!isL) {
++    slli(cnt1, cnt1, 1);
++  }
 +
-+  if (str1_isL == str2_isL) { // LL or UU
-+    element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE);
-+    j(DONE);
-+  } else { // LU or UL
-+    Register strL = encLU ? str1 : str2;
-+    Register strU = encLU ? str2 : str1;
-+    VectorRegister vstr1 = encLU ? v4 : v0;
-+    VectorRegister vstr2 = encLU ? v0 : v4;
++  uint64_t mask0101 = UCONST64(0x0101010101010101);
++  uint64_t mask0001 = UCONST64(0x0001000100010001);
++  mv(mask1, isL ? mask0101 : mask0001);
++  uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
++  uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
++  mv(mask2, isL ? mask7f7f : mask7fff);
 +
-+    bind(loop);
-+    vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2);
-+    vle8_v(vstr1, strL);
-+    vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4);
-+    vzext_vf2(vstr2, vstr1);
-+    vle16_v(vstr1, strU);
-+    vmsne_vv(v0, vstr2, vstr1);
-+    vfirst_m(tmp2, v0);
-+    bgez(tmp2, DIFFERENCE);
-+    sub(cnt2, cnt2, tmp1);
-+    add(strL, strL, tmp1);
-+    shadd(strU, tmp1, strU, tmp1, 1);
-+    bnez(cnt2, loop);
-+    j(DONE);
-+  }
-+  bind(DIFFERENCE);
-+  slli(tmp1, tmp2, 1);
-+  add(str1, str1, str1_isL ? tmp2 : tmp1);
-+  add(str2, str2, str2_isL ? tmp2 : tmp1);
-+  str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0));
-+  str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0));
-+  sub(result, tmp1, tmp2);
++  bind(CH1_LOOP);
++  ld(ch1, Address(str1));
++  addi(str1, str1, 8);
++  addi(cnt1, cnt1, -8);
++  compute_match_mask(ch1, ch, match_mask, mask1, mask2);
++  bnez(match_mask, HIT);
++  bgtz(cnt1, CH1_LOOP);
++  j(NOMATCH);
 +
-+  bind(DONE);
-+}
++  bind(HIT);
++  ctzc_bit(trailing_char, match_mask, isL, ch1, result);
++  srli(trailing_char, trailing_char, 3);
++  addi(cnt1, cnt1, 8);
++  ble(cnt1, trailing_char, NOMATCH);
++  // match case
++  if (!isL) {
++    srli(cnt1, cnt1, 1);
++    srli(trailing_char, trailing_char, 1);
++  }
 +
-+address MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) {
-+  Label loop;
-+  assert_different_registers(src, dst, len, tmp, t0);
++  sub(result, orig_cnt, cnt1);
++  add(result, result, trailing_char);
++  j(DONE);
 +
-+  BLOCK_COMMENT("byte_array_inflate_v {");
-+  bind(loop);
-+  vsetvli(tmp, len, Assembler::e8, Assembler::m2);
-+  vle8_v(v2, src);
-+  vsetvli(t0, len, Assembler::e16, Assembler::m4);
-+  vzext_vf2(v0, v2);
-+  vse16_v(v0, dst);
-+  sub(len, len, tmp);
-+  add(src, src, tmp);
-+  shadd(dst, tmp, dst, tmp, 1);
-+  bnez(len, loop);
-+  BLOCK_COMMENT("} byte_array_inflate_v");
-+  postcond(pc() != badAddress);
-+  return pc();
-+}
++  bind(NOMATCH);
++  mv(result, -1);
 +
-+// Compress char[] array to byte[].
-+// result: the array length if every element in array can be encoded; 0, otherwise.
-+void MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) {
-+  Label done;
-+  encode_iso_array_v(src, dst, len, result, tmp);
-+  beqz(len, done);
-+  mv(result, zr);
-+  bind(done);
++  bind(DONE);
++  BLOCK_COMMENT("} string_indexof_char");
 +}
 +
-+// result: the number of elements had been encoded.
-+void MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) {
-+  Label loop, DIFFERENCE, DONE;
++typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
 +
-+  BLOCK_COMMENT("encode_iso_array_v {");
-+  mv(result, 0);
-+
-+  bind(loop);
-+  mv(tmp, 0xff);
-+  vsetvli(t0, len, Assembler::e16, Assembler::m2);
-+  vle16_v(v2, src);
-+  // if element > 0xff, stop
-+  vmsgtu_vx(v1, v2, tmp);
-+  vfirst_m(tmp, v1);
-+  vmsbf_m(v0, v1);
-+  // compress char to byte
-+  vsetvli(t0, len, Assembler::e8);
-+  vncvt_x_x_w(v1, v2, Assembler::v0_t);
-+  vse8_v(v1, dst, Assembler::v0_t);
-+
-+  bgez(tmp, DIFFERENCE);
-+  add(result, result, t0);
-+  add(dst, dst, t0);
-+  sub(len, len, t0);
-+  shadd(src, t0, src, t0, 1);
-+  bnez(len, loop);
-+  j(DONE);
++// Search for needle in haystack and return index or -1
++// x10: result
++// x11: haystack
++// x12: haystack_len
++// x13: needle
++// x14: needle_len
++void MacroAssembler::string_indexof(Register haystack, Register needle,
++                                       Register haystack_len, Register needle_len,
++                                       Register tmp1, Register tmp2,
++                                       Register tmp3, Register tmp4,
++                                       Register tmp5, Register tmp6,
++                                       Register result, int ae)
++{
++  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
 +
-+  bind(DIFFERENCE);
-+  add(result, result, tmp);
++  Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH;
 +
-+  bind(DONE);
-+  BLOCK_COMMENT("} encode_iso_array_v");
-+}
++  Register ch1 = t0;
++  Register ch2 = t1;
++  Register nlen_tmp = tmp1; // needle len tmp
++  Register hlen_tmp = tmp2; // haystack len tmp
++  Register result_tmp = tmp4;
 +
-+address MacroAssembler::has_negatives_v(Register ary, Register len, Register result, Register tmp) {
-+  Label loop, DONE;
++  bool isLL = ae == StrIntrinsicNode::LL;
 +
-+  mv(result, true);
++  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
++  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
++  int needle_chr_shift = needle_isL ? 0 : 1;
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  int needle_chr_size = needle_isL ? 1 : 2;
++  int haystack_chr_size = haystack_isL ? 1 : 2;
++  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                   (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                     (load_chr_insn)&MacroAssembler::lhu;
 +
-+  bind(loop);
-+  vsetvli(t0, len, Assembler::e8, Assembler::m4);
-+  vle8_v(v0, ary);
-+  // if element highest bit is set, return true
-+  vmslt_vx(v0, v0, zr);
-+  vfirst_m(tmp, v0);
-+  bgez(tmp, DONE);
-+
-+  sub(len, len, t0);
-+  add(ary, ary, t0);
-+  bnez(len, loop);
-+  mv(result, false);
++  BLOCK_COMMENT("string_indexof {");
 +
-+  bind(DONE);
-+  postcond(pc() != badAddress);
-+  return pc();
-+}
++  // Note, inline_string_indexOf() generates checks:
++  // if (pattern.count > src.count) return -1;
++  // if (pattern.count == 0) return 0;
 +
-+// string indexof
-+// compute index by trailing zeros
-+void MacroAssembler::compute_index(Register haystack, Register trailing_zero,
-+                                   Register match_mask, Register result,
-+                                   Register ch2, Register tmp,
-+                                   bool haystack_isL)
-+{
-+  int haystack_chr_shift = haystack_isL ? 0 : 1;
-+  srl(match_mask, match_mask, trailing_zero);
-+  srli(match_mask, match_mask, 1);
-+  srli(tmp, trailing_zero, LogBitsPerByte);
-+  if (!haystack_isL) andi(tmp, tmp, 0xE);
-+  add(haystack, haystack, tmp);
-+  ld(ch2, Address(haystack));
-+  if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
-+  add(result, result, tmp);
-+}
++  // We have two strings, a source string in haystack, haystack_len and a pattern string
++  // in needle, needle_len. Find the first occurence of pattern in source or return -1.
 +
-+// string indexof
-+// Find pattern element in src, compute match mask,
-+// only the first occurrence of 0x80/0x8000 at low bits is the valid match index
-+// match mask patterns would be like:
-+// - 0x8080808080808080 (Latin1)
-+// - 0x8000800080008000 (UTF16)
-+void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
-+                                        Register mask1, Register mask2)
-+{
-+  xorr(src, pattern, src);
-+  sub(match_mask, src, mask1);
-+  orr(src, src, mask2);
-+  notr(src, src);
-+  andr(match_mask, match_mask, src);
-+}
++  // For larger pattern and source we use a simplified Boyer Moore algorithm.
++  // With a small pattern and source we use linear scan.
 +
-+// add two unsigned input and output carry
-+void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
-+{
-+  assert_different_registers(dst, carry);
-+  assert_different_registers(dst, src2);
-+  add(dst, src1, src2);
-+  sltu(carry, dst, src2);
-+}
++  // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
++  sub(result_tmp, haystack_len, needle_len);
++  // needle_len < 8, use linear scan
++  sub(t0, needle_len, 8);
++  bltz(t0, LINEARSEARCH);
++  // needle_len >= 256, use linear scan
++  sub(t0, needle_len, 256);
++  bgez(t0, LINEARSTUB);
++  // needle_len >= haystack_len/4, use linear scan
++  srli(t0, haystack_len, 2);
++  bge(needle_len, t0, LINEARSTUB);
 +
-+// add two input with carry
-+void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
-+{
-+  assert_different_registers(dst, carry);
-+  add(dst, src1, src2);
-+  add(dst, dst, carry);
-+}
++  // Boyer-Moore-Horspool introduction:
++  // The Boyer Moore alogorithm is based on the description here:-
++  //
++  // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
++  //
++  // This describes and algorithm with 2 shift rules. The 'Bad Character' rule
++  // and the 'Good Suffix' rule.
++  //
++  // These rules are essentially heuristics for how far we can shift the
++  // pattern along the search string.
++  //
++  // The implementation here uses the 'Bad Character' rule only because of the
++  // complexity of initialisation for the 'Good Suffix' rule.
++  //
++  // This is also known as the Boyer-Moore-Horspool algorithm:
++  //
++  // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm
++  //
++  // #define ASIZE 256
++  //
++  //    int bm(unsigned char *pattern, int m, unsigned char *src, int n) {
++  //      int i, j;
++  //      unsigned c;
++  //      unsigned char bc[ASIZE];
++  //
++  //      /* Preprocessing */
++  //      for (i = 0; i < ASIZE; ++i)
++  //        bc[i] = m;
++  //      for (i = 0; i < m - 1; ) {
++  //        c = pattern[i];
++  //        ++i;
++  //        // c < 256 for Latin1 string, so, no need for branch
++  //        #ifdef PATTERN_STRING_IS_LATIN1
++  //        bc[c] = m - i;
++  //        #else
++  //        if (c < ASIZE) bc[c] = m - i;
++  //        #endif
++  //      }
++  //
++  //      /* Searching */
++  //      j = 0;
++  //      while (j <= n - m) {
++  //        c = src[i+j];
++  //        if (pattern[m-1] == c)
++  //          int k;
++  //          for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
++  //          if (k < 0) return j;
++  //          // c < 256 for Latin1 string, so, no need for branch
++  //          #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1
++  //          // LL case: (c< 256) always true. Remove branch
++  //          j += bc[pattern[j+m-1]];
++  //          #endif
++  //          #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF
++  //          // UU case: need if (c<ASIZE) check. Skip 1 character if not.
++  //          if (c < ASIZE)
++  //            j += bc[pattern[j+m-1]];
++  //          else
++  //            j += 1
++  //          #endif
++  //          #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1
++  //          // UL case: need if (c<ASIZE) check. Skip <pattern length> if not.
++  //          if (c < ASIZE)
++  //            j += bc[pattern[j+m-1]];
++  //          else
++  //            j += m
++  //          #endif
++  //      }
++  //      return -1;
++  //    }
 +
-+// add two unsigned input with carry and output carry
-+void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
-+{
-+  assert_different_registers(dst, src2);
-+  adc(dst, src1, src2, carry);
-+  sltu(carry, dst, src2);
-+}
++  // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result
++  Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH,
++          BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP;
 +
-+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
-+                                     Register src1, Register src2, Register carry)
-+{
-+  cad(dest_lo, dest_lo, src1, carry);
-+  add(dest_hi, dest_hi, carry);
-+  cad(dest_lo, dest_lo, src2, carry);
-+  add(final_dest_hi, dest_hi, carry);
-+}
++  Register haystack_end = haystack_len;
++  Register skipch = tmp2;
 +
-+// Code for BigInteger::mulAdd instrinsic
-+// out     = x10
-+// in      = x11
-+// offset  = x12  (already out.length-offset)
-+// len     = x13
-+// k       = x14
-+void MacroAssembler::mul_add(Register out, Register in, Register offset,
-+                             Register len, Register k, Register tmp1, Register tmp2) {
-+  Label L_loop_1, L_loop_2, L_end, L_not_zero;
-+  bnez(len, L_not_zero);
-+  mv(out, zr);
-+  j(L_end);
-+  bind(L_not_zero);
-+  zero_extend(k, k, 32);
-+  shadd(offset, offset, out, t0, LogBytesPerInt);
-+  shadd(in, len, in, t0, LogBytesPerInt);
-+  mv(out, zr);
++  // pattern length is >=8, so, we can read at least 1 register for cases when
++  // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for
++  // UL case. We'll re-read last character in inner pre-loop code to have
++  // single outer pre-loop load
++  const int firstStep = isLL ? 7 : 3;
 +
-+  if (AvoidUnalignedAccesses) {
-+    // if in and offset are both 8 bytes aligned.
-+    orr(t0, in, offset);
-+    andi(t0, t0, 0x7);
-+    beqz(t0, L_loop_2);
-+  } else {
-+    j(L_loop_2);
-+  }
++  const int ASIZE = 256;
++  const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd)
 +
-+  bind(L_loop_1);
-+  sub(in, in, 4);
-+  lwu(t0, Address(in, 0));
-+  mul(t1, t0, k);
-+  add(t0, t1, out);
-+  sub(offset, offset, 4);
-+  lwu(t1, Address(offset, 0));
-+  add(t0, t0, t1);
-+  sw(t0, Address(offset));
-+  srli(out, t0, 32);
-+  sub(len, len, 1);
-+  beqz(len, L_end);
-+  j(L_loop_1);
-+
-+
-+  bind(L_loop_2);
-+  Label L_one;
-+  sub(len, len, 1);
-+  bltz(len, L_end);
-+  sub(len, len, 1);
-+  bltz(len, L_one);
-+
-+  sub(in, in, 8);
-+  ld(tmp1, Address(in, 0));
-+  ror_imm(tmp1, tmp1, 32); // convert to little-endian
-+
-+  const Register carry = out;
-+  const Register src1_hi = t0;
-+  const Register src1_lo = tmp2;
-+  const Register src2 = t1;
-+
-+  mulhu(src1_hi, k, tmp1);
-+  mul(src1_lo, k, tmp1);
-+  sub(offset, offset, 8);
-+  ld(src2, Address(offset, 0));
-+  ror_imm(src2, src2, 32, tmp1);
-+  add2_with_carry(carry, src1_hi, src1_lo, carry, src2, tmp1);
-+  ror_imm(src1_lo, src1_lo, 32, tmp1); // back to big-endian
-+  sd(src1_lo, Address(offset, 0));
-+  j(L_loop_2);
-+
-+  bind(L_one);
-+  sub(in, in, 4);
-+  lwu(t0, Address(in, 0));
-+  mul(t1, t0, k);
-+  add(t0, t1, out);
-+  sub(offset, offset, 4);
-+  lwu(t1, Address(offset, 0));
-+  add(t0, t0, t1);
-+  sw(t0, Address(offset));
-+  srli(out, t0, 32);
++  sub(sp, sp, ASIZE);
 +
-+  bind(L_end);
-+}
++  // init BC offset table with default value: needle_len
++  slli(t0, needle_len, 8);
++  orr(t0, t0, needle_len); // [63...16][needle_len][needle_len]
++  slli(tmp1, t0, 16);
++  orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len]
++  slli(tmp1, t0, 32);
++  orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len]
 +
-+/**
-+ * Multiply 32 bit by 32 bit first loop.
-+ */
-+void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
-+                                           Register y, Register y_idx, Register z,
-+                                           Register carry, Register product,
-+                                           Register idx, Register kdx)
-+{
-+  // long carry = 0;
-+  // for (int j=ystart, k=ystart+1+xstart; j >= 0; j--, k--) {
-+  //     long product = (y[j] & LONG_MASK) *
-+  //                    (x[xstart] & LONG_MASK) + carry;
-+  //     z[k] = (int)product;
-+  //     carry = product >>> 32;
-+  // }
-+  // z[xstart] = (int)carry;
++  mv(ch1, sp);  // ch1 is t0
++  mv(tmp6, ASIZE / STORE_BYTES); // loop iterations
 +
-+  Label L_first_loop, L_first_loop_exit;
++  bind(BM_INIT_LOOP);
++  // for (i = 0; i < ASIZE; ++i)
++  //   bc[i] = m;
++  for (int i = 0; i < 4; i++) {
++    sd(tmp5, Address(ch1, i * wordSize));
++  }
++  add(ch1, ch1, 32);
++  sub(tmp6, tmp6, 4);
++  bgtz(tmp6, BM_INIT_LOOP);
 +
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  lwu(x_xstart, Address(t0, 0));
++  sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern
++  Register orig_haystack = tmp5;
++  mv(orig_haystack, haystack);
++  // result_tmp = tmp4
++  shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
++  sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
++  mv(tmp3, needle);
 +
-+  bind(L_first_loop);
-+  sub(idx, idx, 1);
-+  bltz(idx, L_first_loop_exit);
++  //  for (i = 0; i < m - 1; ) {
++  //    c = pattern[i];
++  //    ++i;
++  //    // c < 256 for Latin1 string, so, no need for branch
++  //    #ifdef PATTERN_STRING_IS_LATIN1
++  //    bc[c] = m - i;
++  //    #else
++  //    if (c < ASIZE) bc[c] = m - i;
++  //    #endif
++  //  }
++  bind(BCLOOP);
++  (this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
++  add(tmp3, tmp3, needle_chr_size);
++  if (!needle_isL) {
++    // ae == StrIntrinsicNode::UU
++    mv(tmp6, ASIZE);
++    bgeu(ch1, tmp6, BCSKIP);
++  }
++  add(tmp4, sp, ch1);
++  sb(ch2, Address(tmp4)); // store skip offset to BC offset table
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  lwu(y_idx, Address(t0, 0));
-+  mul(product, x_xstart, y_idx);
-+  add(product, product, carry);
-+  srli(carry, product, 32);
-+  sub(kdx, kdx, 1);
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sw(product, Address(t0, 0));
-+  j(L_first_loop);
++  bind(BCSKIP);
++  sub(ch2, ch2, 1); // for next pattern element, skip distance -1
++  bgtz(ch2, BCLOOP);
 +
-+  bind(L_first_loop_exit);
-+}
++  // tmp6: pattern end, address after needle
++  shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift);
++  if (needle_isL == haystack_isL) {
++    // load last 8 bytes (8LL/4UU symbols)
++    ld(tmp6, Address(tmp6, -wordSize));
++  } else {
++    // UL: from UTF-16(source) search Latin1(pattern)
++    lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols)
++    // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d
++    // We'll have to wait until load completed, but it's still faster than per-character loads+checks
++    srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
++    slli(ch2, tmp6, XLEN - 24);
++    srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
++    slli(ch1, tmp6, XLEN - 16);
++    srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
++    andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
++    slli(ch2, ch2, 16);
++    orr(ch2, ch2, ch1); // 0x00000b0c
++    slli(result, tmp3, 48); // use result as temp register
++    orr(tmp6, tmp6, result); // 0x0a00000d
++    slli(result, ch2, 16);
++    orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
++  }
 +
-+/**
-+ * Multiply 64 bit by 64 bit first loop.
-+ */
-+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
-+                                           Register y, Register y_idx, Register z,
-+                                           Register carry, Register product,
-+                                           Register idx, Register kdx)
-+{
-+  //
-+  //  jlong carry, x[], y[], z[];
-+  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
-+  //    huge_128 product = y[idx] * x[xstart] + carry;
-+  //    z[kdx] = (jlong)product;
-+  //    carry  = (jlong)(product >>> 64);
-+  //  }
-+  //  z[xstart] = carry;
-+  //
++  // i = m - 1;
++  // skipch = j + i;
++  // if (skipch == pattern[m - 1]
++  //   for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k);
++  // else
++  //   move j with bad char offset table
++  bind(BMLOOPSTR2);
++  // compare pattern to source string backward
++  shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
++  (this->*haystack_load_1chr)(skipch, Address(result), noreg);
++  sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8
++  if (needle_isL == haystack_isL) {
++    // re-init tmp3. It's for free because it's executed in parallel with
++    // load above. Alternative is to initialize it before loop, but it'll
++    // affect performance on in-order systems with 2 or more ld/st pipelines
++    srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
++  }
++  if (!isLL) { // UU/UL case
++    slli(ch2, nlen_tmp, 1); // offsets in bytes
++  }
++  bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
++  add(result, haystack, isLL ? nlen_tmp : ch2);
++  ld(ch2, Address(result)); // load 8 bytes from source string
++  mv(ch1, tmp6);
++  if (isLL) {
++    j(BMLOOPSTR1_AFTER_LOAD);
++  } else {
++    sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
++    j(BMLOOPSTR1_CMP);
++  }
 +
-+  Label L_first_loop, L_first_loop_exit;
-+  Label L_one_x, L_one_y, L_multiply;
++  bind(BMLOOPSTR1);
++  shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift);
++  (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
++  shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift);
++  (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
 +
-+  sub(xstart, xstart, 1);
-+  bltz(xstart, L_one_x);
++  bind(BMLOOPSTR1_AFTER_LOAD);
++  sub(nlen_tmp, nlen_tmp, 1);
++  bltz(nlen_tmp, BMLOOPSTR1_LASTCMP);
 +
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  ld(x_xstart, Address(t0, 0));
-+  ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
++  bind(BMLOOPSTR1_CMP);
++  beq(ch1, ch2, BMLOOPSTR1);
 +
-+  bind(L_first_loop);
-+  sub(idx, idx, 1);
-+  bltz(idx, L_first_loop_exit);
-+  sub(idx, idx, 1);
-+  bltz(idx, L_one_y);
++  bind(BMSKIP);
++  if (!isLL) {
++    // if we've met UTF symbol while searching Latin1 pattern, then we can
++    // skip needle_len symbols
++    if (needle_isL != haystack_isL) {
++      mv(result_tmp, needle_len);
++    } else {
++      mv(result_tmp, 1);
++    }
++    mv(t0, ASIZE);
++    bgeu(skipch, t0, BMADV);
++  }
++  add(result_tmp, sp, skipch);
++  lbu(result_tmp, Address(result_tmp)); // load skip offset
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  ld(y_idx, Address(t0, 0));
-+  ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
-+  bind(L_multiply);
++  bind(BMADV);
++  sub(nlen_tmp, needle_len, 1);
++  // move haystack after bad char skip offset
++  shadd(haystack, result_tmp, haystack, result, haystack_chr_shift);
++  ble(haystack, haystack_end, BMLOOPSTR2);
++  add(sp, sp, ASIZE);
++  j(NOMATCH);
 +
-+  mulhu(t0, x_xstart, y_idx);
-+  mul(product, x_xstart, y_idx);
-+  cad(product, product, carry, t1);
-+  adc(carry, t0, zr, t1);
++  bind(BMLOOPSTR1_LASTCMP);
++  bne(ch1, ch2, BMSKIP);
 +
-+  sub(kdx, kdx, 2);
-+  ror_imm(product, product, 32); // back to big-endian
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sd(product, Address(t0, 0));
++  bind(BMMATCH);
++  sub(result, haystack, orig_haystack);
++  if (!haystack_isL) {
++    srli(result, result, 1);
++  }
++  add(sp, sp, ASIZE);
++  j(DONE);
 +
-+  j(L_first_loop);
++  bind(LINEARSTUB);
++  sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
++  bltz(t0, LINEARSEARCH);
++  mv(result, zr);
++  RuntimeAddress stub = NULL;
++  if (isLL) {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
++    assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
++  } else if (needle_isL) {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
++    assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
++  } else {
++    stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
++    assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
++  }
++  trampoline_call(stub);
++  j(DONE);
 +
-+  bind(L_one_y);
-+  lwu(y_idx, Address(y, 0));
-+  j(L_multiply);
++  bind(NOMATCH);
++  mv(result, -1);
++  j(DONE);
 +
-+  bind(L_one_x);
-+  lwu(x_xstart, Address(x, 0));
-+  j(L_first_loop);
++  bind(LINEARSEARCH);
++  string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae);
 +
-+  bind(L_first_loop_exit);
++  bind(DONE);
++  BLOCK_COMMENT("} string_indexof");
 +}
 +
-+/**
-+ * Multiply 128 bit by 128. Unrolled inner loop.
-+ *
-+ */
-+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
-+                                             Register carry, Register carry2,
-+                                             Register idx, Register jdx,
-+                                             Register yz_idx1, Register yz_idx2,
-+                                             Register tmp, Register tmp3, Register tmp4,
-+                                             Register tmp6, Register product_hi)
++// string_indexof
++// result: x10
++// src: x11
++// src_count: x12
++// pattern: x13
++// pattern_count: x14 or 1/2/3/4
++void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle,
++                                                  Register haystack_len, Register needle_len,
++                                                  Register tmp1, Register tmp2,
++                                                  Register tmp3, Register tmp4,
++                                                  int needle_con_cnt, Register result, int ae)
 +{
-+  //   jlong carry, x[], y[], z[];
-+  //   int kdx = xstart+1;
-+  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
-+  //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
-+  //     jlong carry2  = (jlong)(tmp3 >>> 64);
-+  //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
-+  //     carry  = (jlong)(tmp4 >>> 64);
-+  //     z[kdx+idx+1] = (jlong)tmp3;
-+  //     z[kdx+idx] = (jlong)tmp4;
-+  //   }
-+  //   idx += 2;
-+  //   if (idx > 0) {
-+  //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
-+  //     z[kdx+idx] = (jlong)yz_idx1;
-+  //     carry  = (jlong)(yz_idx1 >>> 64);
-+  //   }
-+  //
-+
-+  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
-+
-+  srli(jdx, idx, 2);
++  // Note:
++  // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant
++  // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1
++  assert(needle_con_cnt <= 4, "Invalid needle constant count");
++  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
 +
-+  bind(L_third_loop);
++  Register ch1 = t0;
++  Register ch2 = t1;
++  Register hlen_neg = haystack_len, nlen_neg = needle_len;
++  Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4;
 +
-+  sub(jdx, jdx, 1);
-+  bltz(jdx, L_third_loop_exit);
-+  sub(idx, idx, 4);
++  bool isLL = ae == StrIntrinsicNode::LL;
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  ld(yz_idx2, Address(t0, 0));
-+  ld(yz_idx1, Address(t0, wordSize));
++  bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL;
++  bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU;
++  int needle_chr_shift = needle_isL ? 0 : 1;
++  int haystack_chr_shift = haystack_isL ? 0 : 1;
++  int needle_chr_size = needle_isL ? 1 : 2;
++  int haystack_chr_size = haystack_isL ? 1 : 2;
 +
-+  shadd(tmp6, idx, z, t0, LogBytesPerInt);
++  load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                   (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu :
++                                     (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu;
++  load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld;
 +
-+  ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
-+  ror_imm(yz_idx2, yz_idx2, 32);
++  Label DO1, DO2, DO3, MATCH, NOMATCH, DONE;
 +
-+  ld(t1, Address(tmp6, 0));
-+  ld(t0, Address(tmp6, wordSize));
++  Register first = tmp3;
 +
-+  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
-+  mulhu(tmp4, product_hi, yz_idx1);
++  if (needle_con_cnt == -1) {
++    Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT;
 +
-+  ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
-+  ror_imm(t1, t1, 32, tmp);
++    sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2);
++    bltz(t0, DOSHORT);
 +
-+  mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
-+  mulhu(carry2, product_hi, yz_idx2);
++    (this->*needle_load_1chr)(first, Address(needle), noreg);
++    slli(t0, needle_len, needle_chr_shift);
++    add(needle, needle, t0);
++    neg(nlen_neg, t0);
++    slli(t0, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, t0);
++    neg(hlen_neg, t0);
 +
-+  cad(tmp3, tmp3, carry, carry);
-+  adc(tmp4, tmp4, zr, carry);
-+  cad(tmp3, tmp3, t0, t0);
-+  cadc(tmp4, tmp4, tmp, t0);
-+  adc(carry, carry2, zr, t0);
-+  cad(tmp4, tmp4, t1, carry2);
-+  adc(carry, carry, zr, carry2);
++    bind(FIRST_LOOP);
++    add(t0, haystack, hlen_neg);
++    (this->*haystack_load_1chr)(ch2, Address(t0), noreg);
++    beq(first, ch2, STR1_LOOP);
 +
-+  ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
-+  ror_imm(tmp4, tmp4, 32);
-+  sd(tmp4, Address(tmp6, 0));
-+  sd(tmp3, Address(tmp6, wordSize));
++    bind(STR2_NEXT);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, FIRST_LOOP);
++    j(NOMATCH);
 +
-+  j(L_third_loop);
++    bind(STR1_LOOP);
++    add(nlen_tmp, nlen_neg, needle_chr_size);
++    add(hlen_tmp, hlen_neg, haystack_chr_size);
++    bgez(nlen_tmp, MATCH);
 +
-+  bind(L_third_loop_exit);
++    bind(STR1_NEXT);
++    add(ch1, needle, nlen_tmp);
++    (this->*needle_load_1chr)(ch1, Address(ch1), noreg);
++    add(ch2, haystack, hlen_tmp);
++    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++    bne(ch1, ch2, STR2_NEXT);
++    add(nlen_tmp, nlen_tmp, needle_chr_size);
++    add(hlen_tmp, hlen_tmp, haystack_chr_size);
++    bltz(nlen_tmp, STR1_NEXT);
++    j(MATCH);
 +
-+  andi(idx, idx, 0x3);
-+  beqz(idx, L_post_third_loop_done);
++    bind(DOSHORT);
++    if (needle_isL == haystack_isL) {
++      sub(t0, needle_len, 2);
++      bltz(t0, DO1);
++      bgtz(t0, DO3);
++    }
++  }
 +
-+  Label L_check_1;
-+  sub(idx, idx, 2);
-+  bltz(idx, L_check_1);
++  if (needle_con_cnt == 4) {
++    Label CH1_LOOP;
++    (this->*load_4chr)(ch1, Address(needle), noreg);
++    sub(result_tmp, haystack_len, 4);
++    slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
 +
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  ld(yz_idx1, Address(t0, 0));
-+  ror_imm(yz_idx1, yz_idx1, 32);
++    bind(CH1_LOOP);
++    add(ch2, haystack, hlen_neg);
++    (this->*load_4chr)(ch2, Address(ch2), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, CH1_LOOP);
++    j(NOMATCH);
++  }
 +
-+  mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
-+  mulhu(tmp4, product_hi, yz_idx1);
++  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) {
++    Label CH1_LOOP;
++    BLOCK_COMMENT("string_indexof DO2 {");
++    bind(DO2);
++    (this->*load_2chr)(ch1, Address(needle), noreg);
++    if (needle_con_cnt == 2) {
++      sub(result_tmp, haystack_len, 2);
++    }
++    slli(tmp3, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
 +
-+  shadd(t0, idx, z, t0, LogBytesPerInt);
-+  ld(yz_idx2, Address(t0, 0));
-+  ror_imm(yz_idx2, yz_idx2, 32, tmp);
++    bind(CH1_LOOP);
++    add(tmp3, haystack, hlen_neg);
++    (this->*load_2chr)(ch2, Address(tmp3), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, CH1_LOOP);
++    j(NOMATCH);
++    BLOCK_COMMENT("} string_indexof DO2");
++  }
 +
-+  add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
++  if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) {
++    Label FIRST_LOOP, STR2_NEXT, STR1_LOOP;
++    BLOCK_COMMENT("string_indexof DO3 {");
 +
-+  ror_imm(tmp3, tmp3, 32, tmp);
-+  sd(tmp3, Address(t0, 0));
++    bind(DO3);
++    (this->*load_2chr)(first, Address(needle), noreg);
++    (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg);
++    if (needle_con_cnt == 3) {
++      sub(result_tmp, haystack_len, 3);
++    }
++    slli(hlen_tmp, result_tmp, haystack_chr_shift);
++    add(haystack, haystack, hlen_tmp);
++    neg(hlen_neg, hlen_tmp);
 +
-+  bind(L_check_1);
++    bind(FIRST_LOOP);
++    add(ch2, haystack, hlen_neg);
++    (this->*load_2chr)(ch2, Address(ch2), noreg);
++    beq(first, ch2, STR1_LOOP);
 +
-+  andi(idx, idx, 0x1);
-+  sub(idx, idx, 1);
-+  bltz(idx, L_post_third_loop_done);
-+  shadd(t0, idx, y, t0, LogBytesPerInt);
-+  lwu(tmp4, Address(t0, 0));
-+  mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
-+  mulhu(carry2, tmp4, product_hi);
++    bind(STR2_NEXT);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, FIRST_LOOP);
++    j(NOMATCH);
 +
-+  shadd(t0, idx, z, t0, LogBytesPerInt);
-+  lwu(tmp4, Address(t0, 0));
++    bind(STR1_LOOP);
++    add(hlen_tmp, hlen_neg, 2 * haystack_chr_size);
++    add(ch2, haystack, hlen_tmp);
++    (this->*haystack_load_1chr)(ch2, Address(ch2), noreg);
++    bne(ch1, ch2, STR2_NEXT);
++    j(MATCH);
++    BLOCK_COMMENT("} string_indexof DO3");
++  }
 +
-+  add2_with_carry(carry2, carry2, tmp3, tmp4, carry);
++  if (needle_con_cnt == -1 || needle_con_cnt == 1) {
++    Label DO1_LOOP;
 +
-+  shadd(t0, idx, z, t0, LogBytesPerInt);
-+  sw(tmp3, Address(t0, 0));
-+  slli(t0, carry2, 32);
-+  srli(carry, tmp3, 32);
-+  orr(carry, carry, t0);
++    BLOCK_COMMENT("string_indexof DO1 {");
++    bind(DO1);
++    (this->*needle_load_1chr)(ch1, Address(needle), noreg);
++    sub(result_tmp, haystack_len, 1);
++    mv(tmp3, result_tmp);
++    if (haystack_chr_shift) {
++      slli(tmp3, result_tmp, haystack_chr_shift);
++    }
++    add(haystack, haystack, tmp3);
++    neg(hlen_neg, tmp3);
 +
-+  bind(L_post_third_loop_done);
++    bind(DO1_LOOP);
++    add(tmp3, haystack, hlen_neg);
++    (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg);
++    beq(ch1, ch2, MATCH);
++    add(hlen_neg, hlen_neg, haystack_chr_size);
++    blez(hlen_neg, DO1_LOOP);
++    BLOCK_COMMENT("} string_indexof DO1");
++  }
++
++  bind(NOMATCH);
++  mv(result, -1);
++  j(DONE);
++
++  bind(MATCH);
++  srai(t0, hlen_neg, haystack_chr_shift);
++  add(result, result_tmp, t0);
++
++  bind(DONE);
 +}
 +
-+/**
-+ * Code for BigInteger::multiplyToLen() instrinsic.
-+ *
-+ * x10: x
-+ * x11: xlen
-+ * x12: y
-+ * x13: ylen
-+ * x14: z
-+ * x15: zlen
-+ * x16: tmp1
-+ * x17: tmp2
-+ * x7:  tmp3
-+ * x28: tmp4
-+ * x29: tmp5
-+ * x30: tmp6
-+ * x31: tmp7
-+ */
-+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
-+                                     Register z, Register zlen,
-+                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
-+                                     Register tmp5, Register tmp6, Register product_hi)
++// Compare strings.
++void MacroAssembler::string_compare(Register str1, Register str2,
++                                       Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
++                                       Register tmp3, int ae)
 +{
-+  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
-+
-+  const Register idx = tmp1;
-+  const Register kdx = tmp2;
-+  const Register xstart = tmp3;
++  Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
++          DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
++          SHORT_LOOP_START, TAIL_CHECK, L;
 +
-+  const Register y_idx = tmp4;
-+  const Register carry = tmp5;
-+  const Register product = xlen;
-+  const Register x_xstart = zlen; // reuse register
++  const int STUB_THRESHOLD = 64 + 8;
++  bool isLL = ae == StrIntrinsicNode::LL;
++  bool isLU = ae == StrIntrinsicNode::LU;
++  bool isUL = ae == StrIntrinsicNode::UL;
 +
-+  mv(idx, ylen); // idx = ylen;
-+  mv(kdx, zlen); // kdx = xlen+ylen;
-+  mv(carry, zr); // carry = 0;
++  bool str1_isL = isLL || isLU;
++  bool str2_isL = isLL || isUL;
 +
-+  Label L_multiply_64_or_128, L_done;
++  // for L strings, 1 byte for 1 character
++  // for U strings, 2 bytes for 1 character
++  int str1_chr_size = str1_isL ? 1 : 2;
++  int str2_chr_size = str2_isL ? 1 : 2;
++  int minCharsInWord = isLL ? wordSize : wordSize / 2;
 +
-+  sub(xstart, xlen, 1);
-+  bltz(xstart, L_done);
++  load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
++  load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu;
 +
-+  const Register jdx = tmp1;
++  BLOCK_COMMENT("string_compare {");
 +
-+  if (AvoidUnalignedAccesses) {
-+    // if x and y are both 8 bytes aligend.
-+    orr(t0, xlen, ylen);
-+    andi(t0, t0, 0x1);
-+    beqz(t0, L_multiply_64_or_128);
-+  } else {
-+    j(L_multiply_64_or_128);
++  // Bizzarely, the counts are passed in bytes, regardless of whether they
++  // are L or U strings, however the result is always in characters.
++  if (!str1_isL) {
++    sraiw(cnt1, cnt1, 1);
++  }
++  if (!str2_isL) {
++    sraiw(cnt2, cnt2, 1);
 +  }
 +
-+  multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
-+  shadd(t0, xstart, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
++  // Compute the minimum of the string lengths and save the difference in result.
++  sub(result, cnt1, cnt2);
++  bgt(cnt1, cnt2, L);
++  mv(cnt2, cnt1);
++  bind(L);
 +
-+  Label L_second_loop_1;
-+  bind(L_second_loop_1);
-+  mv(carry, zr);
-+  mv(jdx, ylen);
-+  sub(xstart, xstart, 1);
-+  bltz(xstart, L_done);
-+  sub(sp, sp, 2 * wordSize);
-+  sd(z, Address(sp, 0));
-+  sd(zr, Address(sp, wordSize));
-+  shadd(t0, xstart, z, t0, LogBytesPerInt);
-+  addi(z, t0, 4);
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  lwu(product, Address(t0, 0));
-+  Label L_third_loop, L_third_loop_exit;
++  // A very short string
++  li(t0, minCharsInWord);
++  ble(cnt2, t0, SHORT_STRING);
 +
-+  bind(L_third_loop);
-+  sub(jdx, jdx, 1);
-+  bltz(jdx, L_third_loop_exit);
++  // Compare longwords
++  // load first parts of strings and finish initialization while loading
++  {
++    if (str1_isL == str2_isL) { // LL or UU
++      // load 8 bytes once to compare
++      ld(tmp1, Address(str1));
++      beq(str1, str2, DONE);
++      ld(tmp2, Address(str2));
++      li(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      sub(cnt2, cnt2, minCharsInWord);
++      beqz(cnt2, TAIL_CHECK);
++      // convert cnt2 from characters to bytes
++      if (!str1_isL) {
++        slli(cnt2, cnt2, 1);
++      }
++      add(str2, str2, cnt2);
++      add(str1, str1, cnt2);
++      sub(cnt2, zr, cnt2);
++    } else if (isLU) { // LU case
++      lwu(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++      li(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      addi(cnt2, cnt2, -4);
++      add(str1, str1, cnt2);
++      sub(cnt1, zr, cnt2);
++      slli(cnt2, cnt2, 1);
++      add(str2, str2, cnt2);
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++      sub(cnt2, zr, cnt2);
++      addi(cnt1, cnt1, 4);
++    } else { // UL case
++      ld(tmp1, Address(str1));
++      lwu(tmp2, Address(str2));
++      li(t0, STUB_THRESHOLD);
++      bge(cnt2, t0, STUB);
++      addi(cnt2, cnt2, -4);
++      slli(t0, cnt2, 1);
++      sub(cnt1, zr, t0);
++      add(str1, str1, t0);
++      add(str2, str2, cnt2);
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++      sub(cnt2, zr, cnt2);
++      addi(cnt1, cnt1, 8);
++    }
++    addi(cnt2, cnt2, isUL ? 4 : 8);
++    bgez(cnt2, TAIL);
++    xorr(tmp3, tmp1, tmp2);
++    bnez(tmp3, DIFFERENCE);
 +
-+  shadd(t0, jdx, y, t0, LogBytesPerInt);
-+  lwu(t0, Address(t0, 0));
-+  mul(t1, t0, product);
-+  add(t0, t1, carry);
-+  shadd(tmp6, jdx, z, t1, LogBytesPerInt);
-+  lwu(t1, Address(tmp6, 0));
-+  add(t0, t0, t1);
-+  sw(t0, Address(tmp6, 0));
-+  srli(carry, t0, 32);
-+  j(L_third_loop);
++    // main loop
++    bind(NEXT_WORD);
++    if (str1_isL == str2_isL) { // LL or UU
++      add(t0, str1, cnt2);
++      ld(tmp1, Address(t0));
++      add(t0, str2, cnt2);
++      ld(tmp2, Address(t0));
++      addi(cnt2, cnt2, 8);
++    } else if (isLU) { // LU case
++      add(t0, str1, cnt1);
++      lwu(tmp1, Address(t0));
++      add(t0, str2, cnt2);
++      ld(tmp2, Address(t0));
++      addi(cnt1, cnt1, 4);
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++      addi(cnt2, cnt2, 8);
++    } else { // UL case
++      add(t0, str2, cnt2);
++      lwu(tmp2, Address(t0));
++      add(t0, str1, cnt1);
++      ld(tmp1, Address(t0));
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++      addi(cnt1, cnt1, 8);
++      addi(cnt2, cnt2, 4);
++    }
++    bgez(cnt2, TAIL);
 +
-+  bind(L_third_loop_exit);
-+  ld(z, Address(sp, 0));
-+  addi(sp, sp, 2 * wordSize);
-+  shadd(t0, xstart, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
++    xorr(tmp3, tmp1, tmp2);
++    beqz(tmp3, NEXT_WORD);
++    j(DIFFERENCE);
++    bind(TAIL);
++    xorr(tmp3, tmp1, tmp2);
++    bnez(tmp3, DIFFERENCE);
++    // Last longword.  In the case where length == 4 we compare the
++    // same longword twice, but that's still faster than another
++    // conditional branch.
++    if (str1_isL == str2_isL) { // LL or UU
++      ld(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++    } else if (isLU) { // LU case
++      lwu(tmp1, Address(str1));
++      ld(tmp2, Address(str2));
++      inflate_lo32(tmp3, tmp1);
++      mv(tmp1, tmp3);
++    } else { // UL case
++      lwu(tmp2, Address(str2));
++      ld(tmp1, Address(str1));
++      inflate_lo32(tmp3, tmp2);
++      mv(tmp2, tmp3);
++    }
++    bind(TAIL_CHECK);
++    xorr(tmp3, tmp1, tmp2);
++    beqz(tmp3, DONE);
 +
-+  j(L_second_loop_1);
++    // Find the first different characters in the longwords and
++    // compute their difference.
++    bind(DIFFERENCE);
++    ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
++    srl(tmp1, tmp1, result);
++    srl(tmp2, tmp2, result);
++    if (isLL) {
++      andi(tmp1, tmp1, 0xFF);
++      andi(tmp2, tmp2, 0xFF);
++    } else {
++      andi(tmp1, tmp1, 0xFFFF);
++      andi(tmp2, tmp2, 0xFFFF);
++    }
++    sub(result, tmp1, tmp2);
++    j(DONE);
++  }
 +
-+  bind(L_multiply_64_or_128);
-+  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
++  bind(STUB);
++  RuntimeAddress stub = NULL;
++  switch (ae) {
++    case StrIntrinsicNode::LL:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL());
++      break;
++    case StrIntrinsicNode::UU:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU());
++      break;
++    case StrIntrinsicNode::LU:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU());
++      break;
++    case StrIntrinsicNode::UL:
++      stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL());
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  assert(stub.target() != NULL, "compare_long_string stub has not been generated");
++  trampoline_call(stub);
++  j(DONE);
 +
-+  Label L_second_loop_2;
-+  beqz(kdx, L_second_loop_2);
++  bind(SHORT_STRING);
++  // Is the minimum length zero?
++  beqz(cnt2, DONE);
++  // arrange code to do most branches while loading and loading next characters
++  // while comparing previous
++  (this->*str1_load_chr)(tmp1, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST_INIT);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  j(SHORT_LOOP_START);
++  bind(SHORT_LOOP);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST);
++  bind(SHORT_LOOP_START);
++  (this->*str1_load_chr)(tmp2, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  (this->*str2_load_chr)(t0, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  bne(tmp1, cnt1, SHORT_LOOP_TAIL);
++  addi(cnt2, cnt2, -1);
++  beqz(cnt2, SHORT_LAST2);
++  (this->*str1_load_chr)(tmp1, Address(str1), t0);
++  addi(str1, str1, str1_chr_size);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  beq(tmp2, t0, SHORT_LOOP);
++  sub(result, tmp2, t0);
++  j(DONE);
++  bind(SHORT_LOOP_TAIL);
++  sub(result, tmp1, cnt1);
++  j(DONE);
++  bind(SHORT_LAST2);
++  beq(tmp2, t0, DONE);
++  sub(result, tmp2, t0);
 +
-+  Label L_carry;
-+  sub(kdx, kdx, 1);
-+  beqz(kdx, L_carry);
++  j(DONE);
++  bind(SHORT_LAST_INIT);
++  (this->*str2_load_chr)(cnt1, Address(str2), t0);
++  addi(str2, str2, str2_chr_size);
++  bind(SHORT_LAST);
++  beq(tmp1, cnt1, DONE);
++  sub(result, tmp1, cnt1);
 +
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
-+  srli(carry, carry, 32);
-+  sub(kdx, kdx, 1);
++  bind(DONE);
 +
-+  bind(L_carry);
-+  shadd(t0, kdx, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
++  BLOCK_COMMENT("} string_compare");
++}
 +
-+  // Second and third (nested) loops.
-+  //
-+  // for (int i = xstart-1; i >= 0; i--) { // Second loop
-+  //   carry = 0;
-+  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
-+  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
-+  //                    (z[k] & LONG_MASK) + carry;
-+  //     z[k] = (int)product;
-+  //     carry = product >>> 32;
-+  //   }
-+  //   z[i] = (int)carry;
-+  // }
-+  //
-+  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
++void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
++                                      Register tmp4, Register tmp5, Register tmp6, Register result,
++                                      Register cnt1, int elem_size) {
++  Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR;
++  Register tmp1 = t0;
++  Register tmp2 = t1;
++  Register cnt2 = tmp2;  // cnt2 only used in array length compare
++  Register elem_per_word = tmp6;
++  int log_elem_size = exact_log2(elem_size);
++  int length_offset = arrayOopDesc::length_offset_in_bytes();
++  int base_offset   = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
 +
-+  bind(L_second_loop_2);
-+  mv(carry, zr); // carry = 0;
-+  mv(jdx, ylen); // j = ystart+1
++  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
++  assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6);
++  li(elem_per_word, wordSize / elem_size);
 +
-+  sub(xstart, xstart, 1); // i = xstart-1;
-+  bltz(xstart, L_done);
++  BLOCK_COMMENT("arrays_equals {");
 +
-+  sub(sp, sp, 4 * wordSize);
-+  sd(z, Address(sp, 0));
++  // if (a1 == a2), return true
++  beq(a1, a2, SAME);
 +
-+  Label L_last_x;
-+  shadd(t0, xstart, z, t0, LogBytesPerInt);
-+  addi(z, t0, 4);
-+  sub(xstart, xstart, 1); // i = xstart-1;
-+  bltz(xstart, L_last_x);
++  mv(result, false);
++  beqz(a1, DONE);
++  beqz(a2, DONE);
++  lwu(cnt1, Address(a1, length_offset));
++  lwu(cnt2, Address(a2, length_offset));
++  bne(cnt2, cnt1, DONE);
++  beqz(cnt1, SAME);
 +
-+  shadd(t0, xstart, x, t0, LogBytesPerInt);
-+  ld(product_hi, Address(t0, 0));
-+  ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
++  slli(tmp5, cnt1, 3 + log_elem_size);
++  sub(tmp5, zr, tmp5);
++  add(a1, a1, base_offset);
++  add(a2, a2, base_offset);
++  ld(tmp3, Address(a1, 0));
++  ld(tmp4, Address(a2, 0));
++  ble(cnt1, elem_per_word, SHORT); // short or same
 +
-+  Label L_third_loop_prologue;
-+  bind(L_third_loop_prologue);
++  // Main 16 byte comparison loop with 2 exits
++  bind(NEXT_DWORD); {
++    ld(tmp1, Address(a1, wordSize));
++    ld(tmp2, Address(a2, wordSize));
++    sub(cnt1, cnt1, 2 * wordSize / elem_size);
++    blez(cnt1, TAIL);
++    bne(tmp3, tmp4, DONE);
++    ld(tmp3, Address(a1, 2 * wordSize));
++    ld(tmp4, Address(a2, 2 * wordSize));
++    add(a1, a1, 2 * wordSize);
++    add(a2, a2, 2 * wordSize);
++    ble(cnt1, elem_per_word, TAIL2);
++  } beq(tmp1, tmp2, NEXT_DWORD);
++  j(DONE);
 +
-+  sd(ylen, Address(sp, wordSize));
-+  sd(x, Address(sp, 2 * wordSize));
-+  sd(xstart, Address(sp, 3 * wordSize));
-+  multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
-+                          tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
-+  ld(z, Address(sp, 0));
-+  ld(ylen, Address(sp, wordSize));
-+  ld(x, Address(sp, 2 * wordSize));
-+  ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
-+  addi(sp, sp, 4 * wordSize);
++  bind(TAIL);
++  xorr(tmp4, tmp3, tmp4);
++  xorr(tmp2, tmp1, tmp2);
++  sll(tmp2, tmp2, tmp5);
++  orr(tmp5, tmp4, tmp2);
++  j(IS_TMP5_ZR);
 +
-+  addi(tmp3, xlen, 1);
-+  shadd(t0, tmp3, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
++  bind(TAIL2);
++  bne(tmp1, tmp2, DONE);
 +
-+  sub(tmp3, tmp3, 1);
-+  bltz(tmp3, L_done);
++  bind(SHORT);
++  xorr(tmp4, tmp3, tmp4);
++  sll(tmp5, tmp4, tmp5);
 +
-+  // z[i] = (int) carry;
-+  srli(carry, carry, 32);
-+  shadd(t0, tmp3, z, t0, LogBytesPerInt);
-+  sw(carry, Address(t0, 0));
-+  j(L_second_loop_2);
++  bind(IS_TMP5_ZR);
++  bnez(tmp5, DONE);
 +
-+  // Next infrequent code is moved outside loops.
-+  bind(L_last_x);
-+  lwu(product_hi, Address(x, 0));
-+  j(L_third_loop_prologue);
++  bind(SAME);
++  mv(result, true);
++  // That's it.
++  bind(DONE);
 +
-+  bind(L_done);
++  BLOCK_COMMENT("} array_equals");
 +}
-+#endif // COMPILER2
 +
-+// Count bits of trailing zero chars from lsb to msb until first non-zero element.
-+// For LL case, one byte for one element, so shift 8 bits once, and for other case,
-+// shift 16 bits once.
-+void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
-+{
-+  if (UseZbb) {
-+    assert_different_registers(Rd, Rs, tmp1);
-+    int step = isLL ? 8 : 16;
-+    ctz(Rd, Rs);
-+    andi(tmp1, Rd, step - 1);
-+    sub(Rd, Rd, tmp1);
-+    return;
-+  }
-+  assert_different_registers(Rd, Rs, tmp1, tmp2);
-+  Label Loop;
-+  int step = isLL ? 8 : 16;
-+  mv(Rd, -step);
-+  mv(tmp2, Rs);
++// Compare Strings
 +
-+  bind(Loop);
-+  addi(Rd, Rd, step);
-+  andi(tmp1, tmp2, ((1 << step) - 1));
-+  srli(tmp2, tmp2, step);
-+  beqz(tmp1, Loop);
-+}
++// For Strings we're passed the address of the first characters in a1
++// and a2 and the length in cnt1.
++// elem_size is the element size in bytes: either 1 or 2.
++// There are two implementations.  For arrays >= 8 bytes, all
++// comparisons (including the final one, which may overlap) are
++// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
++// halfword, then a short, and then a byte.
 +
-+// This instruction reads adjacent 4 bytes from the lower half of source register,
-+// inflate into a register, for example:
-+// Rs: A7A6A5A4A3A2A1A0
-+// Rd: 00A300A200A100A0
-+void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
++void MacroAssembler::string_equals(Register a1, Register a2,
++                                      Register result, Register cnt1, int elem_size)
 +{
-+  assert_different_registers(Rd, Rs, tmp1, tmp2);
-+  mv(tmp1, 0xFF000000);  // first byte mask at lower word
-+  andr(Rd, Rs, tmp1);
-+  for (int i = 0; i < 2; i++) {
-+    slli(Rd, Rd, wordSize);
-+    srli(tmp1, tmp1, wordSize);
-+    andr(tmp2, Rs, tmp1);
-+    orr(Rd, Rd, tmp2);
-+  }
-+  slli(Rd, Rd, wordSize);
-+  andi(tmp2, Rs, 0xFF);  // last byte mask at lower word
-+  orr(Rd, Rd, tmp2);
-+}
++  Label SAME, DONE, SHORT, NEXT_WORD;
++  Register tmp1 = t0;
++  Register tmp2 = t1;
 +
-+// This instruction reads adjacent 4 bytes from the upper half of source register,
-+// inflate into a register, for example:
-+// Rs: A7A6A5A4A3A2A1A0
-+// Rd: 00A700A600A500A4
-+void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
-+{
-+  assert_different_registers(Rd, Rs, tmp1, tmp2);
-+  srli(Rs, Rs, 32);   // only upper 32 bits are needed
-+  inflate_lo32(Rd, Rs, tmp1, tmp2);
-+}
++  assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte");
++  assert_different_registers(a1, a2, result, cnt1, t0, t1);
 +
-+// The size of the blocks erased by the zero_blocks stub.  We must
-+// handle anything smaller than this ourselves in zero_words().
-+const int MacroAssembler::zero_words_block_size = 8;
++  BLOCK_COMMENT("string_equals {");
 +
-+// zero_words() is used by C2 ClearArray patterns.  It is as small as
-+// possible, handling small word counts locally and delegating
-+// anything larger to the zero_blocks stub.  It is expanded many times
-+// in compiled code, so it is important to keep it short.
++  mv(result, false);
 +
-+// ptr:   Address of a buffer to be zeroed.
-+// cnt:   Count in HeapWords.
-+//
-+// ptr, cnt, and t0 are clobbered.
-+address MacroAssembler::zero_words(Register ptr, Register cnt)
-+{
-+  assert(is_power_of_2(zero_words_block_size), "adjust this");
-+  assert(ptr == x28 && cnt == x29, "mismatch in register usage");
-+  assert_different_registers(cnt, t0);
++  // Check for short strings, i.e. smaller than wordSize.
++  sub(cnt1, cnt1, wordSize);
++  bltz(cnt1, SHORT);
 +
-+  BLOCK_COMMENT("zero_words {");
-+  mv(t0, zero_words_block_size);
-+  Label around, done, done16;
-+  bltu(cnt, t0, around);
++  // Main 8 byte comparison loop.
++  bind(NEXT_WORD); {
++    ld(tmp1, Address(a1, 0));
++    add(a1, a1, wordSize);
++    ld(tmp2, Address(a2, 0));
++    add(a2, a2, wordSize);
++    sub(cnt1, cnt1, wordSize);
++    bne(tmp1, tmp2, DONE);
++  } bgtz(cnt1, NEXT_WORD);
++
++  // Last longword.  In the case where length == 4 we compare the
++  // same longword twice, but that's still faster than another
++  // conditional branch.
++  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
++  // length == 4.
++  add(tmp1, a1, cnt1);
++  ld(tmp1, Address(tmp1, 0));
++  add(tmp2, a2, cnt1);
++  ld(tmp2, Address(tmp2, 0));
++  bne(tmp1, tmp2, DONE);
++  j(SAME);
++
++  bind(SHORT);
++  Label TAIL03, TAIL01;
++
++  // 0-7 bytes left.
++  andi(t0, cnt1, 4);
++  beqz(t0, TAIL03);
 +  {
-+    RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
-+    assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
-+    if (StubRoutines::riscv::complete()) {
-+      address tpc = trampoline_call(zero_blocks);
-+      if (tpc == NULL) {
-+        DEBUG_ONLY(reset_labels1(around));
-+        postcond(pc() == badAddress);
-+        return NULL;
-+      }
-+    } else {
-+      jal(zero_blocks);
-+    }
-+  }
-+  bind(around);
-+  for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
-+    Label l;
-+    andi(t0, cnt, i);
-+    beqz(t0, l);
-+    for (int j = 0; j < i; j++) {
-+      sd(zr, Address(ptr, 0));
-+      addi(ptr, ptr, 8);
-+    }
-+    bind(l);
++    lwu(tmp1, Address(a1, 0));
++    add(a1, a1, 4);
++    lwu(tmp2, Address(a2, 0));
++    add(a2, a2, 4);
++    bne(tmp1, tmp2, DONE);
 +  }
++
++  bind(TAIL03);
++  // 0-3 bytes left.
++  andi(t0, cnt1, 2);
++  beqz(t0, TAIL01);
 +  {
-+    Label l;
-+    andi(t0, cnt, 1);
-+    beqz(t0, l);
-+    sd(zr, Address(ptr, 0));
-+    bind(l);
++    lhu(tmp1, Address(a1, 0));
++    add(a1, a1, 2);
++    lhu(tmp2, Address(a2, 0));
++    add(a2, a2, 2);
++    bne(tmp1, tmp2, DONE);
 +  }
-+  BLOCK_COMMENT("} zero_words");
-+  postcond(pc() != badAddress);
-+  return pc();
-+}
-+
-+// base:         Address of a buffer to be zeroed, 8 bytes aligned.
-+// cnt:          Immediate count in HeapWords.
-+#define SmallArraySize (18 * BytesPerLong)
-+void MacroAssembler::zero_words(Register base, uint64_t cnt)
-+{
-+  assert_different_registers(base, t0, t1);
-+
-+  BLOCK_COMMENT("zero_words {");
-+
-+  if (cnt <= SmallArraySize / BytesPerLong) {
-+    for (int i = 0; i < (int)cnt; i++) {
-+      sd(zr, Address(base, i * wordSize));
-+    }
-+  } else {
-+    const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
-+    int remainder = cnt %  unroll;
-+    for (int i = 0; i < remainder; i++) {
-+      sd(zr, Address(base, i * wordSize));
-+    }
 +
-+    Label loop;
-+    Register cnt_reg = t0;
-+    Register loop_base = t1;
-+    cnt = cnt - remainder;
-+    mv(cnt_reg, cnt);
-+    add(loop_base, base, remainder * wordSize);
-+    bind(loop);
-+    sub(cnt_reg, cnt_reg, unroll);
-+    for (int i = 0; i < unroll; i++) {
-+      sd(zr, Address(loop_base, i * wordSize));
++  bind(TAIL01);
++  if (elem_size == 1) { // Only needed when comparing 1-byte elements
++    // 0-1 bytes left.
++    andi(t0, cnt1, 1);
++    beqz(t0, SAME);
++    {
++      lbu(tmp1, a1, 0);
++      lbu(tmp2, a2, 0);
++      bne(tmp1, tmp2, DONE);
 +    }
-+    add(loop_base, loop_base, unroll * wordSize);
-+    bnez(cnt_reg, loop);
 +  }
-+  BLOCK_COMMENT("} zero_words");
++
++  // Arrays are equal.
++  bind(SAME);
++  mv(result, true);
++
++  // That's it.
++  bind(DONE);
++  BLOCK_COMMENT("} string_equals");
 +}
 +
-+// base:   Address of a buffer to be filled, 8 bytes aligned.
-+// cnt:    Count in 8-byte unit.
-+// value:  Value to be filled with.
-+// base will point to the end of the buffer after filling.
-+void MacroAssembler::fill_words(Register base, Register cnt, Register value)
++typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far);
++typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label,
++                                                              bool is_far, bool is_unordered);
++
++static conditional_branch_insn conditional_branches[] =
 +{
-+//  Algorithm:
-+//
-+//    t0 = cnt & 7
-+//    cnt -= t0
-+//    p += t0
-+//    switch (t0):
-+//      switch start:
-+//      do while cnt
-+//        cnt -= 8
-+//          p[-8] = value
-+//        case 7:
-+//          p[-7] = value
-+//        case 6:
-+//          p[-6] = value
-+//          // ...
-+//        case 1:
-+//          p[-1] = value
-+//        case 0:
-+//          p += 8
-+//      do-while end
-+//    switch end
++  /* SHORT branches */
++  (conditional_branch_insn)&Assembler::beq,
++  (conditional_branch_insn)&Assembler::bgt,
++  NULL, // BoolTest::overflow
++  (conditional_branch_insn)&Assembler::blt,
++  (conditional_branch_insn)&Assembler::bne,
++  (conditional_branch_insn)&Assembler::ble,
++  NULL, // BoolTest::no_overflow
++  (conditional_branch_insn)&Assembler::bge,
 +
-+  assert_different_registers(base, cnt, value, t0, t1);
++  /* UNSIGNED branches */
++  (conditional_branch_insn)&Assembler::beq,
++  (conditional_branch_insn)&Assembler::bgtu,
++  NULL,
++  (conditional_branch_insn)&Assembler::bltu,
++  (conditional_branch_insn)&Assembler::bne,
++  (conditional_branch_insn)&Assembler::bleu,
++  NULL,
++  (conditional_branch_insn)&Assembler::bgeu
++};
 +
-+  Label fini, skip, entry, loop;
-+  const int unroll = 8; // Number of sd instructions we'll unroll
++static float_conditional_branch_insn float_conditional_branches[] =
++{
++  /* FLOAT SHORT branches */
++  (float_conditional_branch_insn)&MacroAssembler::float_beq,
++  (float_conditional_branch_insn)&MacroAssembler::float_bgt,
++  NULL,  // BoolTest::overflow
++  (float_conditional_branch_insn)&MacroAssembler::float_blt,
++  (float_conditional_branch_insn)&MacroAssembler::float_bne,
++  (float_conditional_branch_insn)&MacroAssembler::float_ble,
++  NULL, // BoolTest::no_overflow
++  (float_conditional_branch_insn)&MacroAssembler::float_bge,
 +
-+  beqz(cnt, fini);
++  /* DOUBLE SHORT branches */
++  (float_conditional_branch_insn)&MacroAssembler::double_beq,
++  (float_conditional_branch_insn)&MacroAssembler::double_bgt,
++  NULL,
++  (float_conditional_branch_insn)&MacroAssembler::double_blt,
++  (float_conditional_branch_insn)&MacroAssembler::double_bne,
++  (float_conditional_branch_insn)&MacroAssembler::double_ble,
++  NULL,
++  (float_conditional_branch_insn)&MacroAssembler::double_bge
++};
 +
-+  andi(t0, cnt, unroll - 1);
-+  sub(cnt, cnt, t0);
-+  // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
-+  shadd(base, t0, base, t1, 3);
-+  la(t1, entry);
-+  slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
-+  sub(t1, t1, t0);
-+  jr(t1);
++void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) {
++  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])),
++         "invalid conditional branch index");
++  (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far);
++}
 +
-+  bind(loop);
-+  add(base, base, unroll * 8);
-+  for (int i = -unroll; i < 0; i++) {
-+    sd(value, Address(base, i * 8));
++// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use
++// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode().
++void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
++  assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])),
++         "invalid float conditional branch index");
++  int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask);
++  (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
++                                               (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
++}
++
++void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
++  switch (cmpFlag) {
++    case BoolTest::eq:
++    case BoolTest::le:
++      beqz(op1, L, is_far);
++      break;
++    case BoolTest::ne:
++    case BoolTest::gt:
++      bnez(op1, L, is_far);
++      break;
++    default:
++      ShouldNotReachHere();
 +  }
-+  bind(entry);
-+  sub(cnt, cnt, unroll);
-+  bgez(cnt, loop);
++}
 +
-+  bind(fini);
++void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) {
++  switch (cmpFlag) {
++    case BoolTest::eq:
++      beqz(op1, L, is_far);
++      break;
++    case BoolTest::ne:
++      bnez(op1, L, is_far);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
 +}
 +
-+#define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
-+void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
-+  Label L_Okay;                                                                                  \
-+  fscsr(zr);                                                                                     \
-+  FLOATCVT(dst, src);                                                                            \
-+  frcsr(tmp);                                                                                    \
-+  andi(tmp, tmp, 0x1E);                                                                          \
-+  beqz(tmp, L_Okay);                                                                             \
-+  FLOATEQ(tmp, src, src);                                                                        \
-+  bnez(tmp, L_Okay);                                                                             \
-+  mv(dst, zr);                                                                                   \
-+  bind(L_Okay);                                                                                  \
++void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) {
++  Label L;
++  cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L);
++  mv(dst, src);
++  bind(L);
 +}
 +
-+FCVT_SAFE(fcvt_w_s, feq_s)
-+FCVT_SAFE(fcvt_l_s, feq_s)
-+FCVT_SAFE(fcvt_w_d, feq_d)
-+FCVT_SAFE(fcvt_l_d, feq_d)
++// Set dst to NaN if any NaN input.
++void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2,
++                                  bool is_double, bool is_min) {
++  assert_different_registers(dst, src1, src2);
 +
-+#undef FCVT_SAFE
++  Label Done;
++  fsflags(zr);
++  if (is_double) {
++    is_min ? fmin_d(dst, src1, src2)
++           : fmax_d(dst, src1, src2);
++    // Checking NaNs
++    flt_d(zr, src1, src2);
++  } else {
++    is_min ? fmin_s(dst, src1, src2)
++           : fmax_s(dst, src1, src2);
++    // Checking NaNs
++    flt_s(zr, src1, src2);
++  }
 +
-+#define FCMP(FLOATTYPE, FLOATSIG)                                                       \
-+void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
-+                                         FloatRegister Rs2, int unordered_result) {     \
-+  Label Ldone;                                                                          \
-+  if (unordered_result < 0) {                                                           \
-+    /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
-+    /* installs 1 if gt else 0 */                                                       \
-+    flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
-+    /* Rs1 > Rs2, install 1 */                                                          \
-+    bgtz(result, Ldone);                                                                \
-+    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
-+    addi(result, result, -1);                                                           \
-+    /* Rs1 = Rs2, install 0 */                                                          \
-+    /* NaN or Rs1 < Rs2, install -1 */                                                  \
-+    bind(Ldone);                                                                        \
-+  } else {                                                                              \
-+    /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
-+    /* installs 1 if gt or unordered else 0 */                                          \
-+    flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
-+    /* Rs1 < Rs2, install -1 */                                                         \
-+    bgtz(result, Ldone);                                                                \
-+    feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
-+    addi(result, result, -1);                                                           \
-+    /* Rs1 = Rs2, install 0 */                                                          \
-+    /* NaN or Rs1 > Rs2, install 1 */                                                   \
-+    bind(Ldone);                                                                        \
-+    neg(result, result);                                                                \
-+  }                                                                                     \
-+}
++  frflags(t0);
++  beqz(t0, Done);
 +
-+FCMP(float, s);
-+FCMP(double, d);
++  // In case of NaNs
++  is_double ? fadd_d(dst, src1, src2)
++            : fadd_s(dst, src1, src2);
 +
-+#undef FCMP
++  bind(Done);
++}
 +
-+// Zero words; len is in bytes
-+// Destroys all registers except addr
-+// len must be a nonzero multiple of wordSize
-+void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) {
-+  assert_different_registers(addr, len, tmp1, t0, t1);
++#endif // COMPILER2
 +
-+#ifdef ASSERT
-+  {
-+    Label L;
-+    andi(t0, len, BytesPerWord - 1);
-+    beqz(t0, L);
-+    stop("len is not a multiple of BytesPerWord");
-+    bind(L);
-+  }
-+#endif // ASSERT
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+new file mode 100644
+index 0000000000..c660bce437
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+@@ -0,0 +1,966 @@
++/*
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+#ifndef PRODUCT
-+  block_comment("zero memory");
-+#endif // PRODUCT
++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
++#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 +
-+  Label loop;
-+  Label entry;
++#include "asm/assembler.hpp"
++#include "metaprogramming/enableIf.hpp"
 +
-+  // Algorithm:
-+  //
-+  //  t0 = cnt & 7
-+  //  cnt -= t0
-+  //  p += t0
-+  //  switch (t0) {
-+  //    do {
-+  //      cnt -= 8
-+  //        p[-8] = 0
-+  //      case 7:
-+  //        p[-7] = 0
-+  //      case 6:
-+  //        p[-6] = 0
-+  //        ...
-+  //      case 1:
-+  //        p[-1] = 0
-+  //      case 0:
-+  //        p += 8
-+  //     } while (cnt)
-+  //  }
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
 +
-+  const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
-+
-+  srli(len, len, LogBytesPerWord);
-+  andi(t0, len, unroll - 1);  // t0 = cnt % unroll
-+  sub(len, len, t0);          // cnt -= unroll
-+  // tmp1 always points to the end of the region we're about to zero
-+  shadd(tmp1, t0, addr, t1, LogBytesPerWord);
-+  la(t1, entry);
-+  slli(t0, t0, 2);
-+  sub(t1, t1, t0);
-+  jr(t1);
-+  bind(loop);
-+  sub(len, len, unroll);
-+  for (int i = -unroll; i < 0; i++) {
-+    Assembler::sd(zr, Address(tmp1, i * wordSize));
-+  }
-+  bind(entry);
-+  add(tmp1, tmp1, unroll * wordSize);
-+  bnez(len, loop);
-+}
-+
-+// shift left by shamt and add
-+// Rd = (Rs1 << shamt) + Rs2
-+void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
-+  if (UseZba) {
-+    if (shamt == 1) {
-+      sh1add(Rd, Rs1, Rs2);
-+      return;
-+    } else if (shamt == 2) {
-+      sh2add(Rd, Rs1, Rs2);
-+      return;
-+    } else if (shamt == 3) {
-+      sh3add(Rd, Rs1, Rs2);
-+      return;
-+    }
-+  }
-+
-+  if (shamt != 0) {
-+    slli(tmp, Rs1, shamt);
-+    add(Rd, Rs2, tmp);
-+  } else {
-+    add(Rd, Rs1, Rs2);
-+  }
-+}
-+
-+void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
-+  if (UseZba && bits == 32) {
-+    zext_w(dst, src);
-+    return;
-+  }
-+
-+  if (UseZbb && bits == 16) {
-+    zext_h(dst, src);
-+    return;
-+  }
-+
-+  if (bits == 8) {
-+    zext_b(dst, src);
-+  } else {
-+    slli(dst, src, XLEN - bits);
-+    srli(dst, dst, XLEN - bits);
-+  }
-+}
-+
-+void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
-+  if (UseZbb) {
-+    if (bits == 8) {
-+      sext_b(dst, src);
-+      return;
-+    } else if (bits == 16) {
-+      sext_h(dst, src);
-+      return;
-+    }
-+  }
-+
-+  if (bits == 32) {
-+    sext_w(dst, src);
-+  } else {
-+    slli(dst, src, XLEN - bits);
-+    srai(dst, dst, XLEN - bits);
-+  }
-+}
-+
-+void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
-+{
-+  if (src1 == src2) {
-+    mv(dst, zr);
-+    return;
-+  }
-+  Label done;
-+  Register left = src1;
-+  Register right = src2;
-+  if (dst == src1) {
-+    assert_different_registers(dst, src2, tmp);
-+    mv(tmp, src1);
-+    left = tmp;
-+  } else if (dst == src2) {
-+    assert_different_registers(dst, src1, tmp);
-+    mv(tmp, src2);
-+    right = tmp;
-+  }
-+
-+  // installs 1 if gt else 0
-+  slt(dst, right, left);
-+  bnez(dst, done);
-+  slt(dst, left, right);
-+  // dst = -1 if lt; else if eq , dst = 0
-+  neg(dst, dst);
-+  bind(done);
-+}
-+
-+void MacroAssembler::load_constant_pool_cache(Register cpool, Register method)
-+{
-+  ld(cpool, Address(method, Method::const_offset()));
-+  ld(cpool, Address(cpool, ConstMethod::constants_offset()));
-+  ld(cpool, Address(cpool, ConstantPool::cache_offset_in_bytes()));
-+}
-+
-+void MacroAssembler::load_max_stack(Register dst, Register method)
-+{
-+  ld(dst, Address(xmethod, Method::const_offset()));
-+  lhu(dst, Address(dst, ConstMethod::max_stack_offset()));
-+}
-+
-+// The java_calling_convention describes stack locations as ideal slots on
-+// a frame with no abi restrictions. Since we must observe abi restrictions
-+// (like the placement of the register window) the slots must be biased by
-+// the following value.
-+static int reg2offset_in(VMReg r) {
-+  // Account for saved fp and ra
-+  // This should really be in_preserve_stack_slots
-+  return r->reg2stack() * VMRegImpl::stack_slot_size;
-+}
-+
-+static int reg2offset_out(VMReg r) {
-+  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
-+}
-+
-+// On 64 bit we will store integer like items to the stack as
-+// 64 bits items (riscv64 abi) even though java would only store
-+// 32bits for a parameter. On 32bit it will simply be 32 bits
-+// So this routine will do 32->32 on 32bit and 32->64 on 64bit
-+void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) {
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      // stack to stack
-+      ld(tmp, Address(fp, reg2offset_in(src.first())));
-+      sd(tmp, Address(sp, reg2offset_out(dst.first())));
-+    } else {
-+      // stack to reg
-+      lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    }
-+  } else if (dst.first()->is_stack()) {
-+    // reg to stack
-+    sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
-+  } else {
-+    if (dst.first() != src.first()) {
-+      // 32bits extend sign
-+      addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
-+    }
-+  }
-+}
-+
-+// An oop arg. Must pass a handle not the oop itself
-+void MacroAssembler::object_move(OopMap* map,
-+                                 int oop_handle_offset,
-+                                 int framesize_in_slots,
-+                                 VMRegPair src,
-+                                 VMRegPair dst,
-+                                 bool is_receiver,
-+                                 int* receiver_offset) {
-+  assert_cond(map != NULL && receiver_offset != NULL);
-+  // must pass a handle. First figure out the location we use as a handle
-+  Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
-+
-+  // See if oop is NULL if it is we need no handle
-+
-+  if (src.first()->is_stack()) {
-+    // Oop is already on the stack as an argument
-+    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
-+    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
-+    if (is_receiver) {
-+      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
-+    }
-+
-+    ld(t0, Address(fp, reg2offset_in(src.first())));
-+    la(rHandle, Address(fp, reg2offset_in(src.first())));
-+    // conditionally move a NULL
-+    Label notZero1;
-+    bnez(t0, notZero1);
-+    mv(rHandle, zr);
-+    bind(notZero1);
-+  } else {
-+
-+    // Oop is in a register we must store it to the space we reserve
-+    // on the stack for oop_handles and pass a handle if oop is non-NULL
-+
-+    const Register rOop = src.first()->as_Register();
-+    int oop_slot = -1;
-+    if (rOop == j_rarg0) {
-+      oop_slot = 0;
-+    } else if (rOop == j_rarg1) {
-+      oop_slot = 1;
-+    } else if (rOop == j_rarg2) {
-+      oop_slot = 2;
-+    } else if (rOop == j_rarg3) {
-+      oop_slot = 3;
-+    } else if (rOop == j_rarg4) {
-+      oop_slot = 4;
-+    } else if (rOop == j_rarg5) {
-+      oop_slot = 5;
-+    } else if (rOop == j_rarg6) {
-+      oop_slot = 6;
-+    } else {
-+      assert(rOop == j_rarg7, "wrong register");
-+      oop_slot = 7;
-+    }
-+
-+    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
-+    int offset = oop_slot * VMRegImpl::stack_slot_size;
-+
-+    map->set_oop(VMRegImpl::stack2reg(oop_slot));
-+    // Store oop in handle area, may be NULL
-+    sd(rOop, Address(sp, offset));
-+    if (is_receiver) {
-+      *receiver_offset = offset;
-+    }
-+
-+    //rOop maybe the same as rHandle
-+    if (rOop == rHandle) {
-+      Label isZero;
-+      beqz(rOop, isZero);
-+      la(rHandle, Address(sp, offset));
-+      bind(isZero);
-+    } else {
-+      Label notZero2;
-+      la(rHandle, Address(sp, offset));
-+      bnez(rOop, notZero2);
-+      mv(rHandle, zr);
-+      bind(notZero2);
-+    }
-+  }
-+
-+  // If arg is on the stack then place it otherwise it is already in correct reg.
-+  if (dst.first()->is_stack()) {
-+    sd(rHandle, Address(sp, reg2offset_out(dst.first())));
-+  }
-+}
-+
-+// A float arg may have to do float reg int reg conversion
-+void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) {
-+  assert(src.first()->is_stack() && dst.first()->is_stack() ||
-+         src.first()->is_reg() && dst.first()->is_reg() ||
-+         src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      lwu(tmp, Address(fp, reg2offset_in(src.first())));
-+      sw(tmp, Address(sp, reg2offset_out(dst.first())));
-+    } else if (dst.first()->is_Register()) {
-+      lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  } else if (src.first() != dst.first()) {
-+    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
-+      fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  }
-+}
-+
-+// A long move
-+void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) {
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      // stack to stack
-+      ld(tmp, Address(fp, reg2offset_in(src.first())));
-+      sd(tmp, Address(sp, reg2offset_out(dst.first())));
-+    } else {
-+      // stack to reg
-+      ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    }
-+  } else if (dst.first()->is_stack()) {
-+    // reg to stack
-+    sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
-+  } else {
-+    if (dst.first() != src.first()) {
-+      mv(dst.first()->as_Register(), src.first()->as_Register());
-+    }
-+  }
-+}
-+
-+// A double move
-+void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) {
-+  assert(src.first()->is_stack() && dst.first()->is_stack() ||
-+         src.first()->is_reg() && dst.first()->is_reg() ||
-+         src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
-+  if (src.first()->is_stack()) {
-+    if (dst.first()->is_stack()) {
-+      ld(tmp, Address(fp, reg2offset_in(src.first())));
-+      sd(tmp, Address(sp, reg2offset_out(dst.first())));
-+    } else if (dst.first()-> is_Register()) {
-+      ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  } else if (src.first() != dst.first()) {
-+    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
-+      fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
-+    } else {
-+      ShouldNotReachHere();
-+    }
-+  }
-+}
-+
-+void MacroAssembler::rt_call(address dest, Register tmp) {
-+  CodeBlob *cb = CodeCache::find_blob(dest);
-+  if (cb) {
-+    far_call(RuntimeAddress(dest));
-+  } else {
-+    int32_t offset = 0;
-+    la_patchable(tmp, RuntimeAddress(dest), offset);
-+    jalr(x1, tmp, offset);
-+  }
-+}
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-new file mode 100644
-index 000000000..a4d5ce0e0
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
-@@ -0,0 +1,975 @@
-+/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
-+#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
-+
-+#include "asm/assembler.inline.hpp"
-+#include "code/vmreg.hpp"
-+// MacroAssembler extends Assembler by frequently used macros.
-+//
-+// Instructions for which a 'better' code sequence exists depending
-+// on arguments should also go in here.
-+
-+class MacroAssembler: public Assembler {
++class MacroAssembler: public Assembler {
 +
 + public:
 +  MacroAssembler(CodeBuffer* code) : Assembler(code) {
@@ -24277,12 +24306,38 @@ index 000000000..a4d5ce0e0
 +  void safepoint_poll(Label& slow_path);
 +  void safepoint_poll_acquire(Label& slow_path);
 +
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // swap_reg is killed.
++  // tmp_reg must be supplied and must not be rscratch1 or rscratch2
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  int biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL,
++                           Register flag = noreg);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg);
++
++  // Helper functions for statistics gathering.
++  // Unconditional atomic increment.
++  void atomic_incw(Register counter_addr, Register tmp);
++  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
++    la(tmp1, counter_addr);
++    atomic_incw(tmp1, tmp2);
++  }
++
 +  // Alignment
-+  void align(int modulus);
++  void align(int modulus, int extra_offset = 0);
 +
 +  // Stack frame creation/removal
 +  // Note that SP must be updated to the right place before saving/restoring RA and FP
-+  // because signal based thread suspend/resume could happend asychronously
++  // because signal based thread suspend/resume could happen asynchronously.
 +  void enter() {
 +    addi(sp, sp, - 2 * wordSize);
 +    sd(ra, Address(sp, wordSize));
@@ -24415,7 +24470,7 @@ index 000000000..a4d5ce0e0
 +  void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
 +                      Address src, Register tmp1, Register thread_tmp);
 +  void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
-+                       Register src, Register tmp1, Register tmp2, Register tmp3);
++                       Register src, Register tmp1, Register thread_tmp);
 +  void load_klass(Register dst, Register src);
 +  void store_klass(Register dst, Register src);
 +  void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
@@ -24435,7 +24490,7 @@ index 000000000..a4d5ce0e0
 +  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
 +                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
 +  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
-+                      Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0);
++                      Register thread_tmp = noreg, DecoratorSet decorators = 0);
 +
 +  void store_klass_gap(Register dst, Register src);
 +
@@ -24444,6 +24499,8 @@ index 000000000..a4d5ce0e0
 +  // stored using routines that take a jobject.
 +  void store_heap_oop_null(Address dst);
 +
++  void load_prototype_header(Register dst, Register src);
++
 +  // This dummy is to prevent a call to store_heap_oop from
 +  // converting a zero (linke NULL) into a Register by giving
 +  // the compiler two choices it can't resolve
@@ -24459,6 +24516,7 @@ index 000000000..a4d5ce0e0
 +
 +  virtual void null_check(Register reg, int offset = -1);
 +  static bool needs_explicit_null_check(intptr_t offset);
++  static bool uses_implicit_null_check(void* address);
 +
 +  // idiv variant which deals with MINLONG as dividend and -1 as divisor
 +  int corrected_idivl(Register result, Register rs1, Register rs2,
@@ -24481,22 +24539,29 @@ index 000000000..a4d5ce0e0
 +                             RegisterOrConstant vtable_index,
 +                             Register method_result);
 +
++  // Form an addres from base + offset in Rd. Rd my or may not
++  // actually be used: you must use the Address that is returned. It
++  // is up to you to ensure that the shift provided mathces the size
++  // of your data.
++  Address form_address(Register Rd, Register base, long byte_offset);
++
 +  // allocation
-+  void eden_allocate(
++  void tlab_allocate(
 +    Register obj,                   // result: pointer to object after successful allocation
 +    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
 +    int      con_size_in_bytes,     // object size in bytes if   known at compile time
 +    Register tmp1,                  // temp register
-+    Label&   slow_case,             // continuation point if fast allocation fails
++    Register tmp2,                  // temp register
++    Label&   slow_case,             // continuation point of fast allocation fails
 +    bool is_far = false
 +  );
-+  void tlab_allocate(
++
++  void eden_allocate(
 +    Register obj,                   // result: pointer to object after successful allocation
 +    Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
 +    int      con_size_in_bytes,     // object size in bytes if   known at compile time
-+    Register tmp1,                  // temp register
-+    Register tmp2,                  // temp register
-+    Label&   slow_case,             // continuation point of fast allocation fails
++    Register tmp,                   // temp register
++    Label&   slow_case,             // continuation point if fast allocation fails
 +    bool is_far = false
 +  );
 +
@@ -24516,11 +24581,11 @@ index 000000000..a4d5ce0e0
 +
 +  // The reset of the type cehck; must be wired to a corresponding fast path.
 +  // It does not repeat the fast path logic, so don't use it standalone.
-+  // The tmp_reg and tmp2_reg can be noreg, if no tmps are avaliable.
++  // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable.
 +  // Updates the sub's secondary super cache as necessary.
 +  void check_klass_subtype_slow_path(Register sub_klass,
 +                                     Register super_klass,
-+                                     Register tmp_reg,
++                                     Register tmp1_reg,
 +                                     Register tmp2_reg,
 +                                     Label* L_success,
 +                                     Label* L_failure);
@@ -24580,20 +24645,14 @@ index 000000000..a4d5ce0e0
 +
 +  void unimplemented(const char* what = "");
 +
-+  void should_not_reach_here()                   { stop("should not reach here"); }
-+
-+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
-+                                                Register tmp,
-+                                                int offset) {
-+    return RegisterOrConstant(tmp);
-+  }
++  void should_not_reach_here() { stop("should not reach here"); }
 +
 +  static address target_addr_for_insn(address insn_addr);
 +
 +  // Required platform-specific helpers for Label::patch_instructions.
 +  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
-+  static int pd_patch_instruction_size(address branch, address target) ;
-+  void pd_patch_instruction(address branch, address target) {
++  static int pd_patch_instruction_size(address branch, address target);
++  static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
 +    pd_patch_instruction_size(branch, target);
 +  }
 +  static address pd_call_destination(address branch) {
@@ -24619,12 +24678,9 @@ index 000000000..a4d5ce0e0
 +  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
 +
 + public:
-+  // enum used for riscv--x86 linkage to define return type of x86 function
-+  enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double};
-+
 +  // Standard pseudoinstruction
 +  void nop();
-+  void mv(Register Rd, Register Rs) ;
++  void mv(Register Rd, Register Rs);
 +  void notr(Register Rd, Register Rs);
 +  void neg(Register Rd, Register Rs);
 +  void negw(Register Rd, Register Rs);
@@ -24671,11 +24727,11 @@ index 000000000..a4d5ce0e0
 +  void fsflagsi(unsigned imm);
 +
 +  void beqz(Register Rs, const address &dest);
++  void bnez(Register Rs, const address &dest);
 +  void blez(Register Rs, const address &dest);
 +  void bgez(Register Rs, const address &dest);
 +  void bltz(Register Rs, const address &dest);
 +  void bgtz(Register Rs, const address &dest);
-+  void bnez(Register Rs, const address &dest);
 +  void la(Register Rd, Label &label);
 +  void la(Register Rd, const address &dest);
 +  void la(Register Rd, const Address &adr);
@@ -24705,11 +24761,25 @@ index 000000000..a4d5ce0e0
 +  void pop_reg(Register Rd);
 +  int  push_reg(unsigned int bitset, Register stack);
 +  int  pop_reg(unsigned int bitset, Register stack);
-+  static RegSet call_clobbered_registers();
-+  void push_call_clobbered_registers();
-+  void pop_call_clobbered_registers();
-+  void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
-+  void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
++
++  // Push and pop everything that might be clobbered by a native
++  // runtime call except t0 and t1. (They are always
++  // temporary registers, so we don't have to protect them.)
++  // Additional registers can be excluded in a passed RegSet.
++  void push_call_clobbered_registers_except(RegSet exclude);
++  void pop_call_clobbered_registers_except(RegSet exclude);
++
++  void push_call_clobbered_registers() {
++    push_call_clobbered_registers_except(RegSet());
++  }
++  void pop_call_clobbered_registers() {
++    pop_call_clobbered_registers_except(RegSet());
++  }
++
++  void pusha();
++  void popa();
++  void push_CPU_state();
++  void pop_CPU_state();
 +
 +  // if heap base register is used - reinit it with the correct value
 +  void reinit_heapbase();
@@ -24721,8 +24791,6 @@ index 000000000..a4d5ce0e0
 +  }
 +
 +  // mv
-+  void mv(Register Rd, address addr)                    { li(Rd, (int64_t)addr);  }
-+
 +  inline void mv(Register Rd, int imm64)                { li(Rd, (int64_t)imm64); }
 +  inline void mv(Register Rd, long imm64)               { li(Rd, (int64_t)imm64); }
 +  inline void mv(Register Rd, long long imm64)          { li(Rd, (int64_t)imm64); }
@@ -24733,6 +24801,7 @@ index 000000000..a4d5ce0e0
 +  inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
 +
 +  void mv(Register Rd, Address dest);
++  void mv(Register Rd, address dest);
 +  void mv(Register Rd, RegisterOrConstant src);
 +
 +  // logic
@@ -24740,26 +24809,6 @@ index 000000000..a4d5ce0e0
 +  void orrw(Register Rd, Register Rs1, Register Rs2);
 +  void xorrw(Register Rd, Register Rs1, Register Rs2);
 +
-+  // vext
-+  void vmnot_m(VectorRegister vd, VectorRegister vs);
-+  void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
-+  void vfneg_v(VectorRegister vd, VectorRegister vs);
-+
-+  // support for argument shuffling
-+  void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
-+  void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
-+  void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
-+  void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
-+  void object_move(OopMap* map,
-+                   int oop_handle_offset,
-+                   int framesize_in_slots,
-+                   VMRegPair src,
-+                   VMRegPair dst,
-+                   bool is_receiver,
-+                   int* receiver_offset);
-+
-+  void rt_call(address dest, Register tmp = t0);
-+
 +  // revb
 +  void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
 +  void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
@@ -24770,14 +24819,12 @@ index 000000000..a4d5ce0e0
 +  void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
 +  void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
 +
-+  void andi(Register Rd, Register Rn, int64_t increment, Register tmp = t0);
++  void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
++  void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
 +  void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
 +
-+  // Support for serializing memory accesses between threads
-+  void serialize_memory(Register thread, Register tmp1, Register tmp2);
-+
 +  void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
-+  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail) ;
++  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
 +  void cmpxchg(Register addr, Register expected,
 +               Register new_val,
 +               enum operand_size size,
@@ -24817,39 +24864,14 @@ index 000000000..a4d5ce0e0
 +  void atomic_xchgwu(Register prev, Register newv, Register addr);
 +  void atomic_xchgalwu(Register prev, Register newv, Register addr);
 +
-+  // Biased locking support
-+  // lock_reg and obj_reg must be loaded up with the appropriate values.
-+  // swap_reg is killed.
-+  // tmp_reg must be supplied and must not be t0 or t1
-+  // Optional slow case is for implementations (interpreter and C1) which branch to
-+  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
-+  // Returns offset of first potentially-faulting instruction for null
-+  // check info (currently consumed only by C1). If
-+  // swap_reg_contains_mark is true then returns -1 as it is assumed
-+  // the calling code has already passed any potential faults.
-+  int biased_locking_enter(Register lock_reg, Register obj_reg,
-+                           Register swap_reg, Register tmp_reg,
-+                           bool swap_reg_contains_mark,
-+                           Label& done, Label* slow_case = NULL,
-+                           BiasedLockingCounters* counters = NULL,
-+                           Register flag = noreg);
-+  void biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag = noreg);
-+
 +  static bool far_branches() {
 +    return ReservedCodeCacheSize > branch_range;
 +  }
 +
-+  //atomic
-+  void atomic_incw(Register counter_addr, Register tmp1);
-+  void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
-+    la(tmp1, counter_addr);
-+    atomic_incw(tmp1, tmp2);
-+  }
-+
 +  // Jumps that can reach anywhere in the code cache.
 +  // Trashes tmp.
-+  void far_call(Address entry, Register tmp = t0);
-+  void far_jump(Address entry, Register tmp = t0);
++  void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
++  void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
 +
 +  static int far_branch_size() {
 +    if (far_branches()) {
@@ -24864,8 +24886,8 @@ index 000000000..a4d5ce0e0
 +  void bang_stack_with_offset(int offset) {
 +    // stack grows down, caller passes positive offset
 +    assert(offset > 0, "must bang with negative offset");
-+    sub(t1, sp, offset);
-+    sd(zr, Address(t1));
++    sub(t0, sp, offset);
++    sd(zr, Address(t0));
 +  }
 +
 +  void la_patchable(Register reg1, const Address &dest, int32_t &offset);
@@ -24873,123 +24895,45 @@ index 000000000..a4d5ce0e0
 +  virtual void _call_Unimplemented(address call_site) {
 +    mv(t1, call_site);
 +  }
-+  #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
-+
-+#ifdef COMPILER2
-+  void spill(Register Rx, bool is64, int offset) {
-+    is64 ? sd(Rx, Address(sp, offset))
-+         : sw(Rx, Address(sp, offset));
-+  }
-+
-+  void spill(FloatRegister Rx, bool is64, int offset) {
-+    is64 ? fsd(Rx, Address(sp, offset))
-+         : fsw(Rx, Address(sp, offset));
-+  }
-+
-+  void spill(VectorRegister Vx, int offset) {
-+    add(t0, sp, offset);
-+    vs1r_v(Vx, t0);
-+  }
-+
-+  void unspill(Register Rx, bool is64, int offset) {
-+    is64 ? ld(Rx, Address(sp, offset))
-+         : lw(Rx, Address(sp, offset));
-+  }
-+
-+  void unspillu(Register Rx, bool is64, int offset) {
-+    is64 ? ld(Rx, Address(sp, offset))
-+         : lwu(Rx, Address(sp, offset));
-+  }
-+
-+  void unspill(FloatRegister Rx, bool is64, int offset) {
-+    is64 ? fld(Rx, Address(sp, offset))
-+         : flw(Rx, Address(sp, offset));
-+  }
-+
-+  void unspill(VectorRegister Vx, int offset) {
-+    add(t0, sp, offset);
-+    vl1r_v(Vx, t0);
-+  }
-+
-+  void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset,
-+                                        int vec_reg_size_in_bytes) {
-+    assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size");
-+    unspill(v0, src_offset);
-+    spill(v0, dst_offset);
-+  }
 +
-+#endif // COMPILER2
++  #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
 +
 +  // Frame creation and destruction shared between JITs.
 +  void build_frame(int framesize);
 +  void remove_frame(int framesize);
 +
 +  void reserved_stack_check();
++
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
++
 +  void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype);
 +  void read_polling_page(Register r, address page, relocInfo::relocType rtype);
 +  void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
-+  // Return: the call PC
-+  address trampoline_call(Address entry);
++
++  address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
 +  address ic_call(address entry, jint method_index = 0);
-+  // Support for memory inc/dec
-+  // n.b. increment/decrement calls with an Address destination will
-+  // need to use a scratch register to load the value to be
-+  // incremented. increment/decrement calls which add or subtract a
-+  // constant value other than sign-extended 12-bit immediate will need
-+  // to use a 2nd scratch register to hold the constant. so, an address
-+  // increment/decrement may trash both t0 and t1.
-+
-+  void increment(const Address dst, int64_t value = 1);
-+  void incrementw(const Address dst, int32_t value = 1);
-+
-+  void decrement(const Address dst, int64_t value = 1);
-+  void decrementw(const Address dst, int32_t value = 1);
-+  void cmpptr(Register src1, Address src2, Label& equal);
-+  void oop_equal(Register obj1, Register obj2, Label& equal, bool is_far = false); // cmpoop
-+  void oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far = false);
-+  void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
-+#ifdef COMPILER2
-+  void minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, bool is_double, bool is_min);
 +
-+  address arrays_equals(Register a1, Register a2, Register tmp3, Register tmp4,
-+                        Register tmp5, Register tmp6, Register result, Register cnt1, int elem_size);
++  void add_memory_int64(const Address dst, int64_t imm);
++  void add_memory_int32(const Address dst, int32_t imm);
 +
-+  void string_equals(Register a1, Register a2, Register result, Register cnt1,
-+                     int elem_size);
-+  void string_compare(Register str1, Register str2,
-+                      Register cnt1, Register cnt2, Register result,
-+                      Register tmp1, Register tmp2, Register tmp3, int ae);
-+  void string_indexof_char_short(Register str1, Register cnt1,
-+                                 Register ch, Register result,
-+                                 bool isL);
-+  void string_indexof_char(Register str1, Register cnt1,
-+                           Register ch, Register result,
-+                           Register tmp1, Register tmp2,
-+                           Register tmp3, Register tmp4,
-+                           bool isL);
-+  void string_indexof(Register str1, Register str2,
-+                      Register cnt1, Register cnt2,
-+                      Register tmp1, Register tmp2,
-+                      Register tmp3, Register tmp4,
-+                      Register tmp5, Register tmp6,
-+                      Register result, int ae);
-+  void string_indexof_linearscan(Register haystack, Register needle,
-+                                 Register haystack_len, Register needle_len,
-+                                 Register tmp1, Register tmp2,
-+                                 Register tmp3, Register tmp4,
-+                                 int needle_con_cnt, Register result, int ae);
-+  void compute_index(Register str1, Register trailing_zero, Register match_mask,
++  void cmpptr(Register src1, Address src2, Label& equal);
++
++  void compute_index(Register str1, Register trailing_zeros, Register match_mask,
 +                     Register result, Register char_tmp, Register tmp,
 +                     bool haystack_isL);
 +  void compute_match_mask(Register src, Register pattern, Register match_mask,
 +                          Register mask1, Register mask2);
++
++#ifdef COMPILER2
++  void mul_add(Register out, Register in, Register offset,
++               Register len, Register k, Register tmp);
 +  void cad(Register dst, Register src1, Register src2, Register carry);
 +  void cadc(Register dst, Register src1, Register src2, Register carry);
 +  void adc(Register dst, Register src1, Register src2, Register carry);
 +  void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
-+                       Register src1, Register src2, Register carry = t0);
-+  void mul_add(Register out, Register in, Register offset,
-+               Register len, Register k, Register tmp1, Register tmp2);
++                       Register src1, Register src2, Register carry);
 +  void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
 +                             Register y, Register y_idx, Register z,
 +                             Register carry, Register product,
@@ -25008,49 +24952,21 @@ index 000000000..a4d5ce0e0
 +                       Register z, Register zlen,
 +                       Register tmp1, Register tmp2, Register tmp3, Register tmp4,
 +                       Register tmp5, Register tmp6, Register product_hi);
-+#endif // COMPILER2
++#endif
++
 +  void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
 +  void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
-+  
++
 +  void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
-+  void zero_words(Register base, uint64_t cnt);
++
++  void zero_words(Register base, u_int64_t cnt);
 +  address zero_words(Register ptr, Register cnt);
 +  void fill_words(Register base, Register cnt, Register value);
-+  void zero_memory(Register addr, Register len, Register tmp1);
++  void zero_memory(Register addr, Register len, Register tmp);
 +
-+   // shift left by shamt and add
++  // shift left by shamt and add
 +  void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
 +
-+#ifdef COMPILER2
-+  // refer to conditional_branches and float_conditional_branches
-+  static const int bool_test_bits = 3;
-+  static const int neg_cond_bits = 2;
-+  static const int unsigned_branch_mask = 1 << bool_test_bits;
-+  static const int double_branch_mask = 1 << bool_test_bits;
-+
-+  void enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src);
-+
-+  // cmp
-+  void cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far = false);
-+  void float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far = false);
-+
-+  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false);
-+  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false);
-+
-+  // intrinsic methods implemented by vector instructions
-+  void string_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size);
-+  void arrays_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size);
-+  void string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2,
-+                        Register result, Register tmp1, Register tmp2, int encForm);
-+
-+  void clear_array_v(Register base, Register cnt);
-+  address byte_array_inflate_v(Register src, Register dst, Register len, Register tmp);
-+  void char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp);
-+  void encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp);
-+
-+  address has_negatives_v(Register ary, Register len, Register result, Register tmp);
-+#endif
-+
 +  // Here the float instructions with safe deal with some exceptions.
 +  // e.g. convert from NaN, +Inf, -Inf to int, float, double
 +  // will trigger exception, we need to deal with these situations
@@ -25138,13 +25054,15 @@ index 000000000..a4d5ce0e0
 +  // if [src1 < src2], dst = -1;
 +  void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
 +
-+  void load_constant_pool_cache(Register cpool, Register method);
++  int push_fp(unsigned int bitset, Register stack);
++  int pop_fp(unsigned int bitset, Register stack);
 +
-+  void load_max_stack(Register dst, Register method);
++  // vext
++  void vmnot_m(VectorRegister vd, VectorRegister vs);
++  void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
++  void vfneg_v(VectorRegister vd, VectorRegister vs);
 +
 +private:
-+  void load_prototype_header(Register dst, Register src);
-+  void repne_scan(Register addr, Register value, Register count, Register tmp);
 +
 +#ifdef ASSERT
 +  // Macro short-hand support to clean-up after a failed call to trampoline
@@ -25155,10 +25073,11 @@ index 000000000..a4d5ce0e0
 +#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3)
 +#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5)
 +#endif
++  void repne_scan(Register addr, Register value, Register count, Register tmp);
 +
 +  // Return true if an address is within the 48-bit RISCV64 address space.
 +  bool is_valid_riscv64_address(address addr) {
-+    // sv48: must have bits 63-48 all equal to bit 47
++    // sv48: must have bits 63–48 all equal to bit 47
 +    return ((uintptr_t)addr >> 47) == 0;
 +  }
 +
@@ -25178,47 +25097,146 @@ index 000000000..a4d5ce0e0
 +  void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
 +  void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
 +
-+#ifdef COMPILER2
-+  void element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2,
-+                       VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE);
-+#endif // COMPILER2
-+};
++public:
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      Register tmp1, Register tmp2, Register tmp3,
++                      int ae);
 +
-+#ifdef ASSERT
-+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
-+#endif
++  void string_indexof_char_short(Register str1, Register cnt1,
++                                 Register ch, Register result,
++                                 bool isL);
 +
-+/**
-+ * class SkipIfEqual:
-+ *
-+ * Instantiating this class will result in assembly code being output that will
-+ * jump around any code emitted between the creation of the instance and it's
-+ * automatic destruction at the end of a scope block, depending on the value of
-+ * the flag passed to the constructor, which will be checked at run-time.
-+ */
-+class SkipIfEqual {
-+ private:
-+  MacroAssembler* _masm;
-+  Label _label;
++  void string_indexof_char(Register str1, Register cnt1,
++                           Register ch, Register result,
++                           Register tmp1, Register tmp2,
++                           Register tmp3, Register tmp4,
++                           bool isL);
 +
-+ public:
-+   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
-+   ~SkipIfEqual();
-+};
-+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
-diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
-new file mode 100644
-index 000000000..fc2b191c0
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
-@@ -0,0 +1,30 @@
-+/*
-+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
++  void string_indexof(Register str1, Register str2,
++                      Register cnt1, Register cnt2,
++                      Register tmp1, Register tmp2,
++                      Register tmp3, Register tmp4,
++                      Register tmp5, Register tmp6,
++                      Register result, int ae);
++
++  void string_indexof_linearscan(Register haystack, Register needle,
++                                 Register haystack_len, Register needle_len,
++                                 Register tmp1, Register tmp2,
++                                 Register tmp3, Register tmp4,
++                                 int needle_con_cnt, Register result, int ae);
++
++  void arrays_equals(Register r1, Register r2,
++                     Register tmp3, Register tmp4,
++                     Register tmp5, Register tmp6,
++                     Register result, Register cnt1,
++                     int elem_size);
++
++  void string_equals(Register r1, Register r2,
++                     Register result, Register cnt1,
++                     int elem_size);
++
++  // refer to conditional_branches and float_conditional_branches
++  static const int bool_test_bits = 3;
++  static const int neg_cond_bits = 2;
++  static const int unsigned_branch_mask = 1 << bool_test_bits;
++  static const int double_branch_mask = 1 << bool_test_bits;
++
++  // cmp
++  void cmp_branch(int cmpFlag,
++                  Register op1, Register op2,
++                  Label& label, bool is_far = false);
++
++  void float_cmp_branch(int cmpFlag,
++                        FloatRegister op1, FloatRegister op2,
++                        Label& label, bool is_far = false);
++
++  void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op,
++                                    Label& L, bool is_far = false);
++
++  void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op,
++                               Label& L, bool is_far = false);
++
++  void enc_cmove(int cmpFlag,
++                 Register op1, Register op2,
++                 Register dst, Register src);
++
++  void spill(Register r, bool is64, int offset) {
++    is64 ? sd(r, Address(sp, offset))
++         : sw(r, Address(sp, offset));
++  }
++
++  void spill(FloatRegister f, bool is64, int offset) {
++    is64 ? fsd(f, Address(sp, offset))
++         : fsw(f, Address(sp, offset));
++  }
++
++  void spill(VectorRegister v, int offset) {
++    add(t0, sp, offset);
++    vs1r_v(v, t0);
++  }
++
++  void unspill(Register r, bool is64, int offset) {
++    is64 ? ld(r, Address(sp, offset))
++         : lw(r, Address(sp, offset));
++  }
++
++  void unspillu(Register r, bool is64, int offset) {
++    is64 ? ld(r, Address(sp, offset))
++         : lwu(r, Address(sp, offset));
++  }
++
++  void unspill(FloatRegister f, bool is64, int offset) {
++    is64 ? fld(f, Address(sp, offset))
++         : flw(f, Address(sp, offset));
++  }
++
++  void unspill(VectorRegister v, int offset) {
++    add(t0, sp, offset);
++    vl1r_v(v, t0);
++  }
++
++  void minmax_FD(FloatRegister dst,
++                 FloatRegister src1, FloatRegister src2,
++                 bool is_double, bool is_min);
++
++};
++
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
++#endif
++
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++ private:
++  MacroAssembler* _masm;
++  Label _label;
++
++ public:
++   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
++   ~SkipIfEqual();
++};
++
++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
+diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
+new file mode 100644
+index 0000000000..ef968ccd96
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
 + * under the terms of the GNU General Public License version 2 only, as
 + * published by the Free Software Foundation.
 + *
@@ -25241,17 +25259,19 @@ index 000000000..fc2b191c0
 +#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
 +#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
 +
++// Still empty.
++
 +#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP
 diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
 new file mode 100644
-index 000000000..d049193d4
+index 0000000000..fd907f77af
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp
-@@ -0,0 +1,440 @@
+@@ -0,0 +1,450 @@
 +/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -25280,9 +25300,11 @@ index 000000000..d049193d4
 +#include "interpreter/interpreter.hpp"
 +#include "interpreter/interpreterRuntime.hpp"
 +#include "memory/allocation.inline.hpp"
++#include "prims/jvmtiExport.hpp"
 +#include "prims/methodHandles.hpp"
 +#include "runtime/flags/flagSetting.hpp"
 +#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
 +
 +#define __ _masm->
 +
@@ -25295,6 +25317,7 @@ index 000000000..d049193d4
 +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 +
 +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  assert_cond(_masm != NULL);
 +  if (VerifyMethodHandles) {
 +    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
 +                 "MH argument is a Class");
@@ -25316,6 +25339,7 @@ index 000000000..d049193d4
 +void MethodHandles::verify_klass(MacroAssembler* _masm,
 +                                 Register obj, SystemDictionary::WKID klass_id,
 +                                 const char* error_message) {
++  assert_cond(_masm != NULL);
 +  InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
 +  Klass* klass = SystemDictionary::well_known_klass(klass_id);
 +  Register temp = t1;
@@ -25338,12 +25362,13 @@ index 000000000..d049193d4
 +  BLOCK_COMMENT("} verify_klass");
 +}
 +
-+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {  }
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {}
 +
 +#endif //ASSERT
 +
 +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
 +                                            bool for_compiler_entry) {
++  assert_cond(_masm != NULL);
 +  assert(method == xmethod, "interpreter calling convention");
 +  Label L_no_such_method;
 +  __ beqz(xmethod, L_no_such_method);
@@ -25374,6 +25399,7 @@ index 000000000..d049193d4
 +                                        Register recv, Register method_temp,
 +                                        Register temp2,
 +                                        bool for_compiler_entry) {
++  assert_cond(_masm != NULL);
 +  BLOCK_COMMENT("jump_to_lambda_form {");
 +  // This is the initial entry point of a lazy method handle.
 +  // After type checking, it picks up the invoker from the LambdaForm.
@@ -25399,7 +25425,7 @@ index 000000000..d049193d4
 +                        sizeof(u2), /*is_signed*/ false);
 +    Label L;
 +    __ ld(t0, __ argument_address(temp2, -1));
-+    __ oop_equal(recv, t0, L);
++    __ beq(recv, t0, L);
 +    __ ld(x10, __ argument_address(temp2, -1));
 +    __ ebreak();
 +    __ BIND(L);
@@ -25412,6 +25438,7 @@ index 000000000..d049193d4
 +// Code generation
 +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
 +                                                                vmIntrinsics::ID iid) {
++  assert_cond(_masm != NULL);
 +  const bool not_for_compiler_entry = false;  // this is the interpreter entry
 +  assert(is_signature_polymorphic(iid), "expected invoke iid");
 +  if (iid == vmIntrinsics::_invokeGeneric ||
@@ -25427,6 +25454,7 @@ index 000000000..d049193d4
 +  // xmethod: Method*
 +  // x13: argument locator (parameter slot count, added to sp)
 +  // x11: used as temp to hold mh or receiver
++  // x10, x29: garbage temps, blown away
 +  Register argp   = x13;   // argument list ptr, live on error paths
 +  Register mh     = x11;   // MH receiver; dies quickly and is recycled
 +
@@ -25476,7 +25504,6 @@ index 000000000..d049193d4
 +  trace_method_handle_interpreter_entry(_masm, iid);
 +  if (iid == vmIntrinsics::_invokeBasic) {
 +    generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry);
-+
 +  } else {
 +    // Adjust argument list by popping the trailing MemberName argument.
 +    Register recv = noreg;
@@ -25499,6 +25526,7 @@ index 000000000..d049193d4
 +                                                    Register receiver_reg,
 +                                                    Register member_reg,
 +                                                    bool for_compiler_entry) {
++  assert_cond(_masm != NULL);
 +  assert(is_signature_polymorphic(iid), "expected invoke iid");
 +  // temps used in this code are not used in *either* compiled or interpreted calling sequences
 +  Register temp1 = x7;
@@ -25592,7 +25620,8 @@ index 000000000..d049193d4
 +        __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg);
 +        break;
 +
-+      case vmIntrinsics::_linkToVirtual: {
++      case vmIntrinsics::_linkToVirtual:
++      {
 +        // same as TemplateTable::invokevirtual,
 +        // minus the CP setup and profiling:
 +
@@ -25619,7 +25648,8 @@ index 000000000..d049193d4
 +        break;
 +      }
 +
-+      case vmIntrinsics::_linkToInterface: {
++      case vmIntrinsics::_linkToInterface:
++      {
 +        // same as TemplateTable::invokeinterface
 +        // (minus the CP setup and profiling, with different argument motion)
 +        if (VerifyMethodHandles) {
@@ -25671,7 +25701,7 @@ index 000000000..d049193d4
 +
 +#ifndef PRODUCT
 +void trace_method_handle_stub(const char* adaptername,
-+                              oop mh,
++                              oopDesc* mh,
 +                              intptr_t* saved_regs,
 +                              intptr_t* entry_sp) {  }
 +
@@ -25690,13 +25720,12 @@ index 000000000..d049193d4
 +#endif //PRODUCT
 diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
 new file mode 100644
-index 000000000..8ed69efe8
+index 0000000000..65493eba76
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp
-@@ -0,0 +1,58 @@
+@@ -0,0 +1,57 @@
 +/*
 + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -25754,13 +25783,13 @@ index 000000000..8ed69efe8
 +                                  bool for_compiler_entry);
 diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
 new file mode 100644
-index 000000000..4b1573130
+index 0000000000..27011ad128
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
-@@ -0,0 +1,404 @@
+@@ -0,0 +1,417 @@
 +/*
-+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -25786,10 +25815,12 @@ index 000000000..4b1573130
 +
 +#include "precompiled.hpp"
 +#include "asm/macroAssembler.hpp"
++#include "code/compiledIC.hpp"
 +#include "memory/resourceArea.hpp"
 +#include "nativeInst_riscv.hpp"
 +#include "oops/oop.inline.hpp"
 +#include "runtime/handles.hpp"
++#include "runtime/orderAccess.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
 +#include "utilities/ostream.hpp"
@@ -25981,24 +26012,20 @@ index 000000000..4b1573130
 +  // Find and replace the oop/metadata corresponding to this
 +  // instruction in oops section.
 +  CodeBlob* cb = CodeCache::find_blob(instruction_address());
-+  if(cb != NULL) {
-+    nmethod* nm = cb->as_nmethod_or_null();
-+    if (nm != NULL) {
-+      RelocIterator iter(nm, instruction_address(), next_instruction_address());
-+      while (iter.next()) {
-+        if (iter.type() == relocInfo::oop_type) {
-+          oop* oop_addr = iter.oop_reloc()->oop_addr();
-+          *oop_addr = cast_to_oop(x);
-+          break;
-+        } else if (iter.type() == relocInfo::metadata_type) {
-+          Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
-+          *metadata_addr = (Metadata*)x;
-+          break;
-+        }
++  nmethod* nm = cb->as_nmethod_or_null();
++  if (nm != NULL) {
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(x);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)x;
++        break;
 +      }
 +    }
-+  } else {
-+    ShouldNotReachHere();
 +  }
 +}
 +
@@ -26040,6 +26067,16 @@ index 000000000..4b1573130
 +  return dest;
 +};
 +
++void NativeJump::set_jump_destination(address dest) {
++  // We use jump to self as the unresolved address which the inline
++  // cache code (and relocs) know about
++  if (dest == (address) -1)
++    dest = instruction_address();
++
++  MacroAssembler::pd_patch_instruction(instruction_address(), dest);
++  ICache::invalidate_range(instruction_address(), instruction_size);
++}
++
 +//-------------------------------------------------------------------
 +
 +address NativeGeneralJump::jump_destination() const {
@@ -26061,6 +26098,7 @@ index 000000000..4b1573130
 +}
 +
 +bool NativeInstruction::is_lwu_to_zr(address instr) {
++  assert_cond(instr != NULL);
 +  return (extract_opcode(instr) == 0b0000011 &&
 +          extract_funct3(instr) == 0b110 &&
 +          extract_rd(instr) == zr);         // zr
@@ -26077,6 +26115,10 @@ index 000000000..4b1573130
 +  *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction
 +}
 +
++bool NativeInstruction::is_stop() {
++  return uint_at(0) == 0xffffffff; // an illegal instruction
++}
++
 +//-------------------------------------------------------------------
 +
 +// MT-safe inserting of a jump over a jump or a nop (used by
@@ -26164,14 +26206,14 @@ index 000000000..4b1573130
 +}
 diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
 new file mode 100644
-index 000000000..e8a4e0a46
+index 0000000000..2e5c84ee3b
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
-@@ -0,0 +1,561 @@
+@@ -0,0 +1,555 @@
 +/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -26215,12 +26257,15 @@ index 000000000..e8a4e0a46
 +// The base class for different kinds of native instruction abstractions.
 +// Provides the primitive operations to manipulate code relative to this.
 +
++class NativeCall;
++
 +class NativeInstruction {
 +  friend class Relocation;
 +  friend bool is_NativeCallTrampolineStub_at(address);
 + public:
 +  enum {
-+    instruction_size = 4
++    instruction_size = 4,
++    compressed_instruction_size = 2,
 +  };
 +
 +  juint encoding() const {
@@ -26246,7 +26291,7 @@ index 000000000..e8a4e0a46
 +  static bool is_slli_shift_at(address instr, uint32_t shift) {
 +    assert_cond(instr != NULL);
 +    return (extract_opcode(instr) == 0b0010011 && // opcode field
-+            extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation 
++            extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation
 +            Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift);    // shamt field
 +  }
 +
@@ -26341,9 +26386,10 @@ index 000000000..e8a4e0a46
 +  //     load
 +  static bool check_load_pc_relative_data_dependency(address instr) {
 +    address auipc = instr;
-+    address last_instr = auipc + instruction_size;
++    address load = auipc + instruction_size;
 +
-+    return extract_rs1(last_instr) == extract_rd(auipc);
++    return extract_rd(load) == extract_rd(auipc) &&
++           extract_rs1(load) == extract_rd(load);
 +  }
 +
 +  static bool is_movptr_at(address instr);
@@ -26364,6 +26410,7 @@ index 000000000..e8a4e0a46
 +  inline bool is_jump_or_nop();
 +  bool is_safepoint_poll();
 +  bool is_sigill_zombie_not_entrant();
++  bool is_stop();
 +
 + protected:
 +  address addr_at(int offset) const    { return address(this) + offset; }
@@ -26422,27 +26469,24 @@ index 000000000..e8a4e0a46
 +  address return_address() const            { return addr_at(return_address_offset); }
 +  address destination() const;
 +
-+  void set_destination(address dest)      {
-+    if (is_jal()) {
-+      intptr_t offset = (intptr_t)(dest - instruction_address());
-+      assert((offset & 0x1) == 0, "should be aligned");
-+      assert(is_imm_in_range(offset, 20, 1), "set_destination, offset is too large to be patched in one jal insrusction\n");
-+      unsigned int insn = 0b1101111; // jal
-+      address pInsn = (address)(&insn);
-+      Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
-+      Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
-+      Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
-+      Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
-+      Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
-+      set_int_at(displacement_offset, insn);
-+      return;
-+    }
-+    ShouldNotReachHere();
++  void set_destination(address dest) {
++    assert(is_jal(), "Should be jal instruction!");
++    intptr_t offset = (intptr_t)(dest - instruction_address());
++    assert((offset & 0x1) == 0, "bad alignment");
++    assert(is_imm_in_range(offset, 20, 1), "encoding constraint");
++    unsigned int insn = 0b1101111; // jal
++    address pInsn = (address)(&insn);
++    Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
++    Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
++    Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
++    Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
++    Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
++    set_int_at(displacement_offset, insn);
 +  }
 +
-+  void  verify_alignment()                       { ; }
-+  void  verify();
-+  void  print();
++  void verify_alignment() {} // do nothing on riscv
++  void verify();
++  void print();
 +
 +  // Creation
 +  inline friend NativeCall* nativeCall_at(address addr);
@@ -26478,18 +26522,14 @@ index 000000000..e8a4e0a46
 +inline NativeCall* nativeCall_at(address addr) {
 +  assert_cond(addr != NULL);
 +  NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset);
-+#ifdef ASSERT
-+  call->verify();
-+#endif
++  DEBUG_ONLY(call->verify());
 +  return call;
 +}
 +
 +inline NativeCall* nativeCall_before(address return_address) {
 +  assert_cond(return_address != NULL);
 +  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
-+#ifdef ASSERT
-+  call->verify();
-+#endif
++  DEBUG_ONLY(call->verify());
 +  return call;
 +}
 +
@@ -26529,7 +26569,7 @@ index 000000000..e8a4e0a46
 +  }
 +
 +  intptr_t data() const;
-+  void  set_data(intptr_t x);
++  void set_data(intptr_t x);
 +
 +  void flush() {
 +    if (!maybe_cpool_ref(instruction_address())) {
@@ -26537,8 +26577,8 @@ index 000000000..e8a4e0a46
 +    }
 +  }
 +
-+  void  verify();
-+  void  print();
++  void verify();
++  void print();
 +
 +  // Creation
 +  inline friend NativeMovConstReg* nativeMovConstReg_at(address addr);
@@ -26548,55 +26588,53 @@ index 000000000..e8a4e0a46
 +inline NativeMovConstReg* nativeMovConstReg_at(address addr) {
 +  assert_cond(addr != NULL);
 +  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset);
-+#ifdef ASSERT
-+  test->verify();
-+#endif
++  DEBUG_ONLY(test->verify());
 +  return test;
 +}
 +
 +inline NativeMovConstReg* nativeMovConstReg_before(address addr) {
 +  assert_cond(addr != NULL);
 +  NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
-+#ifdef ASSERT
-+  test->verify();
-+#endif
++  DEBUG_ONLY(test->verify());
 +  return test;
 +}
 +
-+// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented.
++// RISCV should not use C1 runtime patching, but still implement
++// NativeMovRegMem to keep some compilers happy.
 +class NativeMovRegMem: public NativeInstruction {
 + public:
-+  int instruction_start() const {
-+    Unimplemented();
-+    return 0;
-+  }
++  enum RISCV_specific_constants {
++    instruction_size            =    NativeInstruction::instruction_size,
++    instruction_offset          =    0,
++    data_offset                 =    0,
++    next_instruction_offset     =    NativeInstruction::instruction_size
++  };
 +
-+  address instruction_address() const {
-+    Unimplemented();
-+    return NULL;
-+  }
++  int instruction_start() const { return instruction_offset; }
 +
-+  int num_bytes_to_end_of_patch() const {
-+    Unimplemented();
-+    return 0;
-+  }
++  address instruction_address() const { return addr_at(instruction_offset); }
++
++  int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; }
 +
 +  int offset() const;
 +
 +  void set_offset(int x);
 +
-+  void add_offset_in_bytes(int add_offset) { Unimplemented(); }
++  void add_offset_in_bytes(int add_offset) {
++    set_offset(offset() + add_offset);
++  }
 +
 +  void verify();
 +  void print();
 +
 + private:
-+  inline friend NativeMovRegMem* nativeMovRegMem_at (address addr);
++  inline friend NativeMovRegMem* nativeMovRegMem_at(address addr);
 +};
 +
-+inline NativeMovRegMem* nativeMovRegMem_at (address addr) {
-+  Unimplemented();
-+  return NULL;
++inline NativeMovRegMem* nativeMovRegMem_at(address addr) {
++  NativeMovRegMem* test = (NativeMovRegMem*)(addr - NativeMovRegMem::instruction_offset);
++  DEBUG_ONLY(test->verify());
++  return test;
 +}
 +
 +class NativeJump: public NativeInstruction {
@@ -26611,15 +26649,13 @@ index 000000000..e8a4e0a46
 +  address instruction_address() const       { return addr_at(instruction_offset); }
 +  address next_instruction_address() const  { return addr_at(instruction_size); }
 +  address jump_destination() const;
++  void set_jump_destination(address dest);
 +
 +  // Creation
 +  inline friend NativeJump* nativeJump_at(address address);
 +
 +  void verify();
 +
-+  // Unit testing stuff
-+  static void test() {}
-+
 +  // Insertion of native jump instruction
 +  static void insert(address code_pos, address entry);
 +  // MT-safe insertion of native jump at verified method entry
@@ -26629,9 +26665,7 @@ index 000000000..e8a4e0a46
 +
 +inline NativeJump* nativeJump_at(address addr) {
 +  NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset);
-+#ifdef ASSERT
-+  jump->verify();
-+#endif
++  DEBUG_ONLY(jump->verify());
 +  return jump;
 +}
 +
@@ -26699,7 +26733,9 @@ index 000000000..e8a4e0a46
 +  // 3). check if the offset in ld[31:20] equals the data_offset
 +  assert_cond(addr != NULL);
 +  const int instr_size = NativeInstruction::instruction_size;
-+  if (NativeInstruction::is_auipc_at(addr) && NativeInstruction::is_ld_at(addr + instr_size) && NativeInstruction::is_jalr_at(addr + 2 * instr_size) &&
++  if (NativeInstruction::is_auipc_at(addr) &&
++      NativeInstruction::is_ld_at(addr + instr_size) &&
++      NativeInstruction::is_jalr_at(addr + 2 * instr_size) &&
 +      (NativeInstruction::extract_rd(addr)                    == x5) &&
 +      (NativeInstruction::extract_rd(addr + instr_size)       == x5) &&
 +      (NativeInstruction::extract_rs1(addr + instr_size)      == x5) &&
@@ -26731,13 +26767,12 @@ index 000000000..e8a4e0a46
 +#endif // CPU_RISCV_NATIVEINST_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
 new file mode 100644
-index 000000000..04a36c1c7
+index 0000000000..fef8ca9b64
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp
-@@ -0,0 +1,46 @@
+@@ -0,0 +1,42 @@
 +/*
-+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -26770,10 +26805,7 @@ index 000000000..04a36c1c7
 + private:
 +  // This is the hook for finding a register in an "well-known" location,
 +  // such as a register block of a predetermined format.
-+  // Since there is none, we just return NULL.
-+  // See registerMap_riscv.hpp for an example of grabbing registers
-+  // from register save areas of a standard layout.
-+  address pd_location(VMReg reg) const {return NULL;}
++  address pd_location(VMReg reg) const { return NULL; }
 +
 +  // no PD state to clear or copy:
 +  void pd_clear() {}
@@ -26783,14 +26815,14 @@ index 000000000..04a36c1c7
 +#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
 new file mode 100644
-index 000000000..b30c1b107
+index 0000000000..583f67573c
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp
-@@ -0,0 +1,193 @@
+@@ -0,0 +1,192 @@
 +/*
 + * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -26815,7 +26847,6 @@ index 000000000..b30c1b107
 +
 +#include "precompiled.hpp"
 +#include "asm/assembler.hpp"
-+#include "asm/macroAssembler.inline.hpp"
 +#include "asm/register.hpp"
 +#include "interp_masm_riscv.hpp"
 +#include "register_riscv.hpp"
@@ -26982,14 +27013,13 @@ index 000000000..b30c1b107
 +REGISTER_DEFINITION(Register, t2);
 diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp
 new file mode 100644
-index 000000000..76215ef2a
+index 0000000000..ef60cb3bb0
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/register_riscv.cpp
-@@ -0,0 +1,69 @@
+@@ -0,0 +1,64 @@
 +/*
-+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -27017,18 +27047,14 @@ index 000000000..76215ef2a
 +
 +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers *
 +                                          RegisterImpl::max_slots_per_register;
++
 +const int ConcreteRegisterImpl::max_fpr =
 +    ConcreteRegisterImpl::max_gpr +
 +    FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
 +
-+const int ConcreteRegisterImpl::max_vpr =
-+    ConcreteRegisterImpl::max_fpr +
-+    VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register;
-+
-+
 +const char* RegisterImpl::name() const {
-+  const char* names[number_of_registers] = {
-+    "zr", "ra", "sp", "gp", "tp", "x5", "x6", "x7", "fp", "x9",
++  static const char *const names[number_of_registers] = {
++    "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9",
 +    "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
 +    "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals",
 +    "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod"
@@ -27037,7 +27063,7 @@ index 000000000..76215ef2a
 +}
 +
 +const char* FloatRegisterImpl::name() const {
-+  const char* names[number_of_registers] = {
++  static const char *const names[number_of_registers] = {
 +    "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
 +    "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
 +    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
@@ -27047,7 +27073,7 @@ index 000000000..76215ef2a
 +}
 +
 +const char* VectorRegisterImpl::name() const {
-+  const char* names[number_of_registers] = {
++  static const char *const names[number_of_registers] = {
 +    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
 +    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
 +    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
@@ -27057,13 +27083,12 @@ index 000000000..76215ef2a
 +}
 diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp
 new file mode 100644
-index 000000000..8beba6776
+index 0000000000..f64a06eb89
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/register_riscv.hpp
-@@ -0,0 +1,337 @@
+@@ -0,0 +1,381 @@
 +/*
-+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -27121,31 +27146,49 @@ index 000000000..8beba6776
 + public:
 +  enum {
 +    number_of_registers      = 32,
-+    number_of_byte_registers = 32,
-+    max_slots_per_register   = 2
++    max_slots_per_register   = 2,
++
++    // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable
++    // for compressed instructions. See Table 17.2 in spec.
++    compressed_register_base = 8,
++    compressed_register_top  = 15,
 +  };
 +
 +  // derived registers, offsets, and addresses
-+  Register successor() const                          { return as_Register(encoding() + 1); }
++  const Register successor() const { return as_Register(encoding() + 1); }
 +
 +  // construction
 +  inline friend Register as_Register(int encoding);
 +
-+  VMReg as_VMReg();
++  VMReg as_VMReg() const;
 +
 +  // accessors
-+  int   encoding() const                         { assert(is_valid(), "invalid register"); return (intptr_t)this; }
-+  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
-+  bool  has_byte_register() const                { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; }
++  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
++  int encoding_nocheck() const    { return (intptr_t)this; }
++  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
 +  const char* name() const;
-+  int   encoding_nocheck() const                 { return (intptr_t)this; }
++
++  // for rvc
++  int compressed_encoding() const {
++    assert(is_compressed_valid(), "invalid compressed register");
++    return encoding() - compressed_register_base;
++  }
++
++  int compressed_encoding_nocheck() const {
++    return encoding_nocheck() - compressed_register_base;
++  }
++
++  bool is_compressed_valid() const {
++    return encoding_nocheck() >= compressed_register_base &&
++           encoding_nocheck() <= compressed_register_top;
++  }
 +
 +  // Return the bit which represents this register.  This is intended
 +  // to be ORed into a bitmask: for usage see class RegSet below.
-+  unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
++  uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
 +};
 +
-+// The integer registers of the riscv architecture
++// The integer registers of the RISCV architecture
 +
 +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
 +
@@ -27195,23 +27238,41 @@ index 000000000..8beba6776
 + public:
 +  enum {
 +    number_of_registers     = 32,
-+    max_slots_per_register  = 2
++    max_slots_per_register  = 2,
++
++    // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec.
++    compressed_register_base = 8,
++    compressed_register_top  = 15,
 +  };
 +
 +  // construction
 +  inline friend FloatRegister as_FloatRegister(int encoding);
 +
-+  VMReg as_VMReg();
++  VMReg as_VMReg() const;
 +
 +  // derived registers, offsets, and addresses
-+  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
++  FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
 +
 +  // accessors
-+  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
-+  int   encoding_nocheck() const                         { return (intptr_t)this; }
-+  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
++  int encoding_nocheck() const    { return (intptr_t)this; }
++  int is_valid() const            { return (unsigned)encoding_nocheck() < number_of_registers; }
 +  const char* name() const;
 +
++  // for rvc
++  int compressed_encoding() const {
++    assert(is_compressed_valid(), "invalid compressed register");
++    return encoding() - compressed_register_base;
++  }
++
++  int compressed_encoding_nocheck() const {
++    return encoding_nocheck() - compressed_register_base;
++  }
++
++  bool is_compressed_valid() const {
++    return encoding_nocheck() >= compressed_register_base &&
++           encoding_nocheck() <= compressed_register_top;
++  }
 +};
 +
 +// The float registers of the RISCV architecture
@@ -27259,7 +27320,7 @@ index 000000000..8beba6776
 +  return (VectorRegister)(intptr_t) encoding;
 +}
 +
-+// The implementation of vector registers for riscv-v
++// The implementation of vector registers for RVV
 +class VectorRegisterImpl: public AbstractRegisterImpl {
 + public:
 +  enum {
@@ -27270,15 +27331,15 @@ index 000000000..8beba6776
 +  // construction
 +  inline friend VectorRegister as_VectorRegister(int encoding);
 +
-+  VMReg as_VMReg();
++  VMReg as_VMReg() const;
 +
 +  // derived registers, offsets, and addresses
 +  VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
 +
 +  // accessors
-+  int encoding() const             { assert(is_valid(), "invalid register"); return (intptr_t)this; }
-+  int encoding_nocheck() const     { return (intptr_t)this; }
-+  bool is_valid() const            { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  int encoding() const            { assert(is_valid(), "invalid register"); return encoding_nocheck(); }
++  int encoding_nocheck() const    { return (intptr_t)this; }
++  bool is_valid() const           { return (unsigned)encoding_nocheck() < number_of_registers; }
 +  const char* name() const;
 +
 +};
@@ -27331,29 +27392,26 @@ index 000000000..8beba6776
 +  // it's optoregs.
 +
 +    number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
-+                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
-+                           VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers)
++                           FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers)
 +  };
 +
 +  // added to make it compile
 +  static const int max_gpr;
 +  static const int max_fpr;
-+  static const int max_vpr;
 +};
 +
 +// A set of registers
 +class RegSet {
 +  uint32_t _bitset;
 +
-+public:
 +  RegSet(uint32_t bitset) : _bitset(bitset) { }
 +
++public:
++
 +  RegSet() : _bitset(0) { }
 +
 +  RegSet(Register r1) : _bitset(r1->bit()) { }
 +
-+  ~RegSet() {}
-+
 +  RegSet operator+(const RegSet aSet) const {
 +    RegSet result(_bitset | aSet._bitset);
 +    return result;
@@ -27369,6 +27427,11 @@ index 000000000..8beba6776
 +    return *this;
 +  }
 +
++  RegSet &operator-=(const RegSet aSet) {
++    *this = *this - aSet;
++    return *this;
++  }
++
 +  static RegSet of(Register r1) {
 +    return RegSet(r1);
 +  }
@@ -27388,26 +27451,33 @@ index 000000000..8beba6776
 +  static RegSet range(Register start, Register end) {
 +    uint32_t bits = ~0;
 +    bits <<= start->encoding();
-+    bits <<= (31 - end->encoding());
-+    bits >>= (31 - end->encoding());
++    bits <<= 31 - end->encoding();
++    bits >>= 31 - end->encoding();
 +
 +    return RegSet(bits);
 +  }
 +
 +  uint32_t bits() const { return _bitset; }
++
++private:
++
++  Register first() {
++    uint32_t first = _bitset & -_bitset;
++    return first ? as_Register(exact_log2(first)) : noreg;
++  }
 +};
 +
 +#endif // CPU_RISCV_REGISTER_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
 new file mode 100644
-index 000000000..f49fd6439
+index 0000000000..047ea2276c
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp
-@@ -0,0 +1,113 @@
+@@ -0,0 +1,112 @@
 +/*
-+ * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -27444,10 +27514,9 @@ index 000000000..f49fd6439
 +
 +  int bytes;
 +
-+  switch(type()) {
++  switch (type()) {
 +    case relocInfo::oop_type: {
 +      oop_Relocation *reloc = (oop_Relocation *)this;
-+      // in movoop when immediate == false
 +      if (NativeInstruction::is_load_pc_relative_at(addr())) {
 +        address constptr = (address)code()->oop_addr_at(reloc->oop_index());
 +        bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr);
@@ -27519,13 +27588,12 @@ index 000000000..f49fd6439
 +}
 diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
 new file mode 100644
-index 000000000..c30150e0a
+index 0000000000..840ed935d8
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp
-@@ -0,0 +1,45 @@
+@@ -0,0 +1,44 @@
 +/*
-+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -27557,8 +27625,8 @@ index 000000000..c30150e0a
 +  enum {
 +    // Relocations are byte-aligned.
 +    offset_unit        =  1,
-+    // We don't use format().
-+    format_width       =  0
++    // Must be at least 1 for RelocInfo::narrow_oop_in_const.
++    format_width       =  1
 +  };
 +
 + public:
@@ -27570,14 +27638,14 @@ index 000000000..c30150e0a
 +#endif // CPU_RISCV_RELOCINFO_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad
 new file mode 100644
-index 000000000..137e9b7c7
+index 0000000000..02d6167629
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/riscv.ad
-@@ -0,0 +1,10685 @@
+@@ -0,0 +1,10280 @@
 +//
-+// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+// Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 +//
 +// This code is free software; you can redistribute it and/or modify it
@@ -27644,8 +27712,8 @@ index 000000000..137e9b7c7
 +//
 +// follow the C1 compiler in making registers
 +//
-+//   x7, x9-x17, x28-x31 volatile (caller save)
-+//   x0-x4, x8, x27 system (no save, no allocate)
++//   x7, x9-x17, x27-x31 volatile (caller save)
++//   x0-x4, x8, x23 system (no save, no allocate)
 +//   x5-x6 non-allocatable (so we can use them as temporary regs)
 +
 +//
@@ -27658,8 +27726,8 @@ index 000000000..137e9b7c7
 +
 +reg_def R0      ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()         ); // zr
 +reg_def R0_H    ( NS,  NS,  Op_RegI, 0,  x0->as_VMReg()->next() );
-+reg_def R1      ( SOC, SOC, Op_RegI, 1,  x1->as_VMReg()         ); // ra
-+reg_def R1_H    ( SOC, SOC, Op_RegI, 1,  x1->as_VMReg()->next() );
++reg_def R1      ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()         ); // ra
++reg_def R1_H    ( NS,  SOC, Op_RegI, 1,  x1->as_VMReg()->next() );
 +reg_def R2      ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()         ); // sp
 +reg_def R2_H    ( NS,  SOE, Op_RegI, 2,  x2->as_VMReg()->next() );
 +reg_def R3      ( NS,  NS,  Op_RegI, 3,  x3->as_VMReg()         ); // gp
@@ -27803,179 +27871,6 @@ index 000000000..137e9b7c7
 +reg_def F31_H ( SOC, SOC, Op_RegF,  31, f31->as_VMReg()->next() );
 +
 +// ----------------------------
-+// Vector Registers
-+// ----------------------------
-+
-+// For RVV vector registers, we simply extend vector register size to 4
-+// 'logical' slots. This is nominally 128 bits but it actually covers
-+// all possible 'physical' RVV vector register lengths from 128 ~ 1024
-+// bits. The 'physical' RVV vector register length is detected during
-+// startup, so the register allocator is able to identify the correct
-+// number of bytes needed for an RVV spill/unspill.
-+// for Java use vector registers v0-v31 are always save on call just
-+// as the platform ABI treats v0-v31 as caller save.
-+
-+reg_def V0    ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()           );
-+reg_def V0_H  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next()   );
-+reg_def V0_J  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(2)  );
-+reg_def V0_K  ( SOC, SOC, Op_VecA, 0,  v0->as_VMReg()->next(3)  );
-+
-+reg_def V1    ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg() 	        );
-+reg_def V1_H  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next()   );
-+reg_def V1_J  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(2)  );
-+reg_def V1_K  ( SOC, SOC, Op_VecA, 1,  v1->as_VMReg()->next(3)  );
-+
-+reg_def V2    ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()           );
-+reg_def V2_H  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next()   );
-+reg_def V2_J  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(2)  );
-+reg_def V2_K  ( SOC, SOC, Op_VecA, 2,  v2->as_VMReg()->next(3)  );
-+
-+reg_def V3    ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()           );
-+reg_def V3_H  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next()   );
-+reg_def V3_J  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(2)  );
-+reg_def V3_K  ( SOC, SOC, Op_VecA, 3,  v3->as_VMReg()->next(3)  );
-+
-+reg_def V4    ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()           );
-+reg_def V4_H  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next()   );
-+reg_def V4_J  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(2)  );
-+reg_def V4_K  ( SOC, SOC, Op_VecA, 4,  v4->as_VMReg()->next(3)  );
-+
-+reg_def V5    ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg() 	        );
-+reg_def V5_H  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next()   );
-+reg_def V5_J  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(2)  );
-+reg_def V5_K  ( SOC, SOC, Op_VecA, 5,  v5->as_VMReg()->next(3)  );
-+
-+reg_def V6    ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()           );
-+reg_def V6_H  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next()   );
-+reg_def V6_J  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(2)  );
-+reg_def V6_K  ( SOC, SOC, Op_VecA, 6,  v6->as_VMReg()->next(3)  );
-+
-+reg_def V7    ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg() 	        );
-+reg_def V7_H  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next()   );
-+reg_def V7_J  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(2)  );
-+reg_def V7_K  ( SOC, SOC, Op_VecA, 7,  v7->as_VMReg()->next(3)  );
-+
-+reg_def V8    ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()           );
-+reg_def V8_H  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next()   );
-+reg_def V8_J  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(2)  );
-+reg_def V8_K  ( SOC, SOC, Op_VecA, 8,  v8->as_VMReg()->next(3)  );
-+
-+reg_def V9    ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()           );
-+reg_def V9_H  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next()   );
-+reg_def V9_J  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(2)  );
-+reg_def V9_K  ( SOC, SOC, Op_VecA, 9,  v9->as_VMReg()->next(3)  );
-+
-+reg_def V10   ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()          );
-+reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next()  );
-+reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) );
-+reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) );
-+
-+reg_def V11   ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()          );
-+reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next()  );
-+reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) );
-+reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) );
-+
-+reg_def V12   ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()          );
-+reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next()  );
-+reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) );
-+reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) );
-+
-+reg_def V13   ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()          );
-+reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next()  );
-+reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) );
-+reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) );
-+
-+reg_def V14   ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()          );
-+reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next()  );
-+reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) );
-+reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) );
-+
-+reg_def V15   ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()          );
-+reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next()  );
-+reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) );
-+reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) );
-+
-+reg_def V16   ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()          );
-+reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next()  );
-+reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) );
-+reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) );
-+
-+reg_def V17   ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()          );
-+reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next()  );
-+reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) );
-+reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) );
-+
-+reg_def V18   ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()          );
-+reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next()  );
-+reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) );
-+reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) );
-+
-+reg_def V19   ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()          );
-+reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next()  );
-+reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) );
-+reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) );
-+
-+reg_def V20   ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()          );
-+reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next()  );
-+reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) );
-+reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) );
-+
-+reg_def V21   ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()          );
-+reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next()  );
-+reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) );
-+reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) );
-+
-+reg_def V22   ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()          );
-+reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next()  );
-+reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) );
-+reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) );
-+
-+reg_def V23   ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()          );
-+reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next()  );
-+reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) );
-+reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) );
-+
-+reg_def V24   ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()          );
-+reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next()  );
-+reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) );
-+reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) );
-+
-+reg_def V25   ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()          );
-+reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next()  );
-+reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) );
-+reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) );
-+
-+reg_def V26   ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()          );
-+reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next()  );
-+reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) );
-+reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) );
-+
-+reg_def V27   ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()          );
-+reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next()  );
-+reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) );
-+reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) );
-+
-+reg_def V28   ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()          );
-+reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next()  );
-+reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) );
-+reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) );
-+
-+reg_def V29   ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()          );
-+reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next()  );
-+reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) );
-+reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) );
-+
-+reg_def V30   ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()          );
-+reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next()  );
-+reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) );
-+reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) );
-+
-+reg_def V31   ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()          );
-+reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next()  );
-+reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) );
-+reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) );
-+
-+// ----------------------------
 +// Special Registers
 +// ----------------------------
 +
@@ -28073,49 +27968,14 @@ index 000000000..137e9b7c7
 +    F27, F27_H,
 +);
 +
-+alloc_class chunk2(
-+    V0, V0_H, V0_J, V0_K,
-+    V1, V1_H, V1_J, V1_K,
-+    V2, V2_H, V2_J, V2_K,
-+    V3, V3_H, V3_J, V3_K,
-+    V4, V4_H, V4_J, V4_K,
-+    V5, V5_H, V5_J, V5_K,
-+    V6, V6_H, V6_J, V6_K,
-+    V7, V7_H, V7_J, V7_K,
-+    V8, V8_H, V8_J, V8_K,
-+    V9, V9_H, V9_J, V9_K,
-+    V10, V10_H, V10_J, V10_K,
-+    V11, V11_H, V11_J, V11_K,
-+    V12, V12_H, V12_J, V12_K,
-+    V13, V13_H, V13_J, V13_K,
-+    V14, V14_H, V14_J, V14_K,
-+    V15, V15_H, V15_J, V15_K,
-+    V16, V16_H, V16_J, V16_K,
-+    V17, V17_H, V17_J, V17_K,
-+    V18, V18_H, V18_J, V18_K,
-+    V19, V19_H, V19_J, V19_K,
-+    V20, V20_H, V20_J, V20_K,
-+    V21, V21_H, V21_J, V21_K,
-+    V22, V22_H, V22_J, V22_K,
-+    V23, V23_H, V23_J, V23_K,
-+    V24, V24_H, V24_J, V24_K,
-+    V25, V25_H, V25_J, V25_K,
-+    V26, V26_H, V26_J, V26_K,
-+    V27, V27_H, V27_J, V27_K,
-+    V28, V28_H, V28_J, V28_K,
-+    V29, V29_H, V29_J, V29_K,
-+    V30, V30_H, V30_J, V30_K,
-+    V31, V31_H, V31_J, V31_K,
-+);
-+
-+alloc_class chunk3(RFLAGS);
++alloc_class chunk2(RFLAGS);
 +
 +//----------Architecture Description Register Classes--------------------------
 +// Several register classes are automatically defined based upon information in
 +// this architecture description.
 +// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
-+// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
-+// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
++// 2) reg_class compiler_method_reg        ( /* as def'd in frame section */ )
++// 2) reg_class interpreter_method_reg     ( /* as def'd in frame section */ )
 +// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 +//
 +
@@ -28323,7 +28183,7 @@ index 000000000..137e9b7c7
 +);
 +
 +// Class for link register
-+reg_class lr_reg(
++reg_class ra_reg(
 +    R1, R1_H
 +);
 +
@@ -28406,41 +28266,6 @@ index 000000000..137e9b7c7
 +    F31, F31_H
 +);
 +
-+// Class for all RVV vector registers
-+reg_class vectora_reg(
-+    V1, V1_H, V1_J, V1_K,
-+    V2, V2_H, V2_J, V2_K,
-+    V3, V3_H, V3_J, V3_K,
-+    V4, V4_H, V4_J, V4_K,
-+    V5, V5_H, V5_J, V5_K,
-+    V6, V6_H, V6_J, V6_K,
-+    V7, V7_H, V7_J, V7_K,
-+    V8, V8_H, V8_J, V8_K,
-+    V9, V9_H, V9_J, V9_K,
-+    V10, V10_H, V10_J, V10_K,
-+    V11, V11_H, V11_J, V11_K,
-+    V12, V12_H, V12_J, V12_K,
-+    V13, V13_H, V13_J, V13_K,
-+    V14, V14_H, V14_J, V14_K,
-+    V15, V15_H, V15_J, V15_K,
-+    V16, V16_H, V16_J, V16_K,
-+    V17, V17_H, V17_J, V17_K,
-+    V18, V18_H, V18_J, V18_K,
-+    V19, V19_H, V19_J, V19_K,
-+    V20, V20_H, V20_J, V20_K,
-+    V21, V21_H, V21_J, V21_K,
-+    V22, V22_H, V22_J, V22_K,
-+    V23, V23_H, V23_J, V23_K,
-+    V24, V24_H, V24_J, V24_K,
-+    V25, V25_H, V25_J, V25_K,
-+    V26, V26_H, V26_J, V26_K,
-+    V27, V27_H, V27_J, V27_K,
-+    V28, V28_H, V28_J, V28_K,
-+    V29, V29_H, V29_J, V29_K,
-+    V30, V30_H, V30_J, V30_K,
-+    V31, V31_H, V31_J, V31_K
-+);
-+
 +// Class for 64 bit register f0
 +reg_class f0_reg(
 +    F0, F0_H
@@ -28461,31 +28286,6 @@ index 000000000..137e9b7c7
 +    F3, F3_H
 +);
 +
-+// class for vector register v1
-+reg_class v1_reg(
-+    V1, V1_H, V1_J, V1_K
-+);
-+
-+// class for vector register v2
-+reg_class v2_reg(
-+    V2, V2_H, V2_J, V2_K
-+);
-+
-+// class for vector register v3
-+reg_class v3_reg(
-+    V3, V3_H, V3_J, V3_K
-+);
-+
-+// class for vector register v4
-+reg_class v4_reg(
-+    V4, V4_H, V4_J, V4_K
-+);
-+
-+// class for vector register v5
-+reg_class v5_reg(
-+    V5, V5_H, V5_J, V5_K
-+);
-+
 +// class for condition codes
 +reg_class reg_flags(RFLAGS);
 +%}
@@ -28516,7 +28316,7 @@ index 000000000..137e9b7c7
 +  int_def LOAD_COST            (  300,  3 * DEFAULT_COST);          // load, fpload
 +  int_def STORE_COST           (  100,  1 * DEFAULT_COST);          // store, fpstore
 +  int_def XFER_COST            (  300,  3 * DEFAULT_COST);          // mfc, mtc, fcvt, fmove, fcmp
-+  int_def BRANCH_COST          (  100,  1 * DEFAULT_COST);          // branch, jmp, call
++  int_def BRANCH_COST          (  200,  2 * DEFAULT_COST);          // branch, jmp, call
 +  int_def IMUL_COST            ( 1000, 10 * DEFAULT_COST);          // imul
 +  int_def IDIVSI_COST          ( 3400, 34 * DEFAULT_COST);          // idivdi
 +  int_def IDIVDI_COST          ( 6600, 66 * DEFAULT_COST);          // idivsi
@@ -28524,6 +28324,7 @@ index 000000000..137e9b7c7
 +  int_def FMUL_DOUBLE_COST     (  700,  7 * DEFAULT_COST);          // fadd, fmul, fmadd
 +  int_def FDIV_COST            ( 2000, 20 * DEFAULT_COST);          // fdiv
 +  int_def FSQRT_COST           ( 2500, 25 * DEFAULT_COST);          // fsqrt
++  int_def VOLATILE_REF_COST    ( 1000, 10 * DEFAULT_COST);
 +%}
 +
 +
@@ -28535,7 +28336,6 @@ index 000000000..137e9b7c7
 +source_hpp %{
 +
 +#include "asm/macroAssembler.hpp"
-+#include "gc/shared/barrierSetAssembler.hpp"
 +#include "gc/shared/cardTable.hpp"
 +#include "gc/shared/cardTableBarrierSet.hpp"
 +#include "gc/shared/collectedHeap.hpp"
@@ -28584,85 +28384,62 @@ index 000000000..137e9b7c7
 +  }
 +};
 +
-+// predicate controlling translation of StoreCM
-+bool unnecessary_storestore(const Node *storecm);
-+
 +bool is_CAS(int opcode, bool maybe_volatile);
 +
 +// predicate controlling translation of CompareAndSwapX
-+bool needs_acquiring_load_exclusive(const Node *load);
++bool needs_acquiring_load_reserved(const Node *load);
 +
++// predicate controlling translation of StoreCM
++bool unnecessary_storestore(const Node *storecm);
 +
-+// predicate using the temp register for decoding klass
-+bool maybe_use_tmp_register_decoding_klass();
++// predicate controlling addressing modes
++bool size_fits_all_mem_uses(AddPNode* addp, int shift);
 +%}
 +
 +source %{
 +
-+  // Derived RegMask with conditionally allocatable registers
++// Derived RegMask with conditionally allocatable registers
 +
-+  RegMask _ANY_REG32_mask;
-+  RegMask _ANY_REG_mask;
-+  RegMask _PTR_REG_mask;
-+  RegMask _NO_SPECIAL_REG32_mask;
-+  RegMask _NO_SPECIAL_REG_mask;
-+  RegMask _NO_SPECIAL_PTR_REG_mask;
++RegMask _ANY_REG32_mask;
++RegMask _ANY_REG_mask;
++RegMask _PTR_REG_mask;
++RegMask _NO_SPECIAL_REG32_mask;
++RegMask _NO_SPECIAL_REG_mask;
++RegMask _NO_SPECIAL_PTR_REG_mask;
 +
-+  void reg_mask_init() {
++void reg_mask_init() {
 +
-+    _ANY_REG32_mask = _ALL_REG32_mask;
-+    _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));
++  _ANY_REG32_mask = _ALL_REG32_mask;
++  _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg()));
 +
-+    _ANY_REG_mask = _ALL_REG_mask;
-+    _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);
++  _ANY_REG_mask = _ALL_REG_mask;
++  _ANY_REG_mask.SUBTRACT(_ZR_REG_mask);
 +
-+    _PTR_REG_mask = _ALL_REG_mask;
-+    _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);
++  _PTR_REG_mask = _ALL_REG_mask;
++  _PTR_REG_mask.SUBTRACT(_ZR_REG_mask);
 +
-+    _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
-+    _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
++  _NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
++  _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
 +
-+    _NO_SPECIAL_REG_mask = _ALL_REG_mask;
-+    _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
++  _NO_SPECIAL_REG_mask = _ALL_REG_mask;
++  _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
 +
-+    _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
-+    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
++  _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
++  _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
 +
-+    // x27 is not allocatable when compressed oops is on
-+    if (UseCompressedOops) {
-+      _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
-+      _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
-+      _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
-+    }
-+
-+    // x8 is not allocatable when PreserveFramePointer is on
-+    if (PreserveFramePointer) {
-+      _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
-+      _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
-+      _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
-+    }
++  // x27 is not allocatable when compressed oops is on
++  if (UseCompressedOops) {
++    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg()));
++    _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
++    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask);
 +  }
 +
-+
-+// predicate controlling translation of StoreCM
-+//
-+// returns true if a StoreStore must precede the card write otherwise
-+// false
-+bool unnecessary_storestore(const Node *storecm)
-+{
-+  assert(storecm != NULL && storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
-+
-+  // we need to generate a membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore)
-+  // between an object put and the associated card mark when we are using
-+  // CMS without conditional card marking
-+
-+  if (UseConcMarkSweepGC && !UseCondCardMark) {
-+    return false;
++  // x8 is not allocatable when PreserveFramePointer is on
++  if (PreserveFramePointer) {
++    _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg()));
++    _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask);
++    _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask);
 +  }
-+
-+  // a storestore is unnecesary in all other cases
-+
-+  return true;
 +}
 +
 +// is_CAS(int opcode, bool maybe_volatile)
@@ -28671,12 +28448,16 @@ index 000000000..137e9b7c7
 +// values otherwise false.
 +bool is_CAS(int opcode, bool maybe_volatile)
 +{
-+  switch(opcode) {
++  switch (opcode) {
 +    // We handle these
 +    case Op_CompareAndSwapI:
 +    case Op_CompareAndSwapL:
 +    case Op_CompareAndSwapP:
 +    case Op_CompareAndSwapN:
++#if INCLUDE_SHENANDOAHGC
++    case Op_ShenandoahCompareAndSwapP:
++    case Op_ShenandoahCompareAndSwapN:
++#endif
 +    case Op_CompareAndSwapB:
 +    case Op_CompareAndSwapS:
 +    case Op_GetAndSetI:
@@ -28685,10 +28466,6 @@ index 000000000..137e9b7c7
 +    case Op_GetAndSetN:
 +    case Op_GetAndAddI:
 +    case Op_GetAndAddL:
-+#if INCLUDE_SHENANDOAHGC
-+    case Op_ShenandoahCompareAndSwapP:
-+    case Op_ShenandoahCompareAndSwapN:
-+#endif
 +      return true;
 +    case Op_CompareAndExchangeI:
 +    case Op_CompareAndExchangeN:
@@ -28698,7 +28475,7 @@ index 000000000..137e9b7c7
 +    case Op_CompareAndExchangeP:
 +    case Op_WeakCompareAndSwapB:
 +    case Op_WeakCompareAndSwapS:
-+   case Op_WeakCompareAndSwapI:
++    case Op_WeakCompareAndSwapI:
 +    case Op_WeakCompareAndSwapL:
 +    case Op_WeakCompareAndSwapP:
 +    case Op_WeakCompareAndSwapN:
@@ -28711,12 +28488,9 @@ index 000000000..137e9b7c7
 +// predicate controlling translation of CAS
 +//
 +// returns true if CAS needs to use an acquiring load otherwise false
-+bool needs_acquiring_load_exclusive(const Node *n)
++bool needs_acquiring_load_reserved(const Node *n)
 +{
 +  assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap");
-+  if (UseBarriersForVolatile) {
-+    return false;
-+  }
 +
 +  LoadStoreNode* ldst = n->as_LoadStore();
 +  if (n != NULL && is_CAS(n->Opcode(), false)) {
@@ -28728,11 +28502,28 @@ index 000000000..137e9b7c7
 +  return true;
 +}
 +
-+bool maybe_use_tmp_register_decoding_klass() {
-+  return !UseCompressedOops &&
-+         Universe::narrow_klass_base() != NULL &&
-+         Universe::narrow_klass_shift() != 0;
++// predicate controlling translation of StoreCM
++//
++// returns true if a StoreStore must precede the card write otherwise
++// false
++
++bool unnecessary_storestore(const Node *storecm)
++{
++  assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
++
++  // we need to generate a dmb ishst between an object put and the
++  // associated card mark when we are using CMS without conditional
++  // card marking
++
++  if (UseConcMarkSweepGC && !UseCondCardMark) {
++    return false;
++  }
++
++  // a storestore is unnecesary in all other cases
++
++  return true;
 +}
++
 +#define __ _masm.
 +
 +// advance declarations for helper functions to convert register
@@ -28751,14 +28542,13 @@ index 000000000..137e9b7c7
 +
 +int MachCallStaticJavaNode::ret_addr_offset()
 +{
-+  // call should be a simple jal
-+  int off = 4;
-+  return off;
++  // jal
++  return 1 * NativeInstruction::instruction_size;
 +}
 +
 +int MachCallDynamicJavaNode::ret_addr_offset()
 +{
-+  return 28; // movptr, jal
++  return 7 * NativeInstruction::instruction_size; // movptr, jal
 +}
 +
 +int MachCallRuntimeNode::ret_addr_offset() {
@@ -28766,14 +28556,13 @@ index 000000000..137e9b7c7
 +  //   jal(addr)
 +  // or with far branches
 +  //   jal(trampoline_stub)
-+  // for real runtime callouts it will be five instructions
++  // for real runtime callouts it will be 11 instructions
 +  // see riscv_enc_java_to_runtime
-+  //   la(t1, retaddr)
-+  //   la(t0, RuntimeAddress(addr))
-+  //   addi(sp, sp, -2 * wordSize)
-+  //   sd(zr, Address(sp))
-+  //   sd(t1, Address(sp, wordSize))
-+  //   jalr(t0)
++  //   la(t1, retaddr)                ->  auipc + addi
++  //   la(t0, RuntimeAddress(addr))   ->  lui + addi + slli + addi + slli + addi
++  //   addi(sp, sp, -2 * wordSize)    ->  addi
++  //   sd(t1, Address(sp, wordSize))  ->  sd
++  //   jalr(t0)                       ->  jalr
 +  CodeBlob *cb = CodeCache::find_blob(_entry_point);
 +  if (cb != NULL) {
 +    return 1 * NativeInstruction::instruction_size;
@@ -28782,6 +28571,34 @@ index 000000000..137e9b7c7
 +  }
 +}
 +
++//
++// Compute padding required for nodes which need alignment
++//
++
++// With RVC a call instruction may get 2-byte aligned.
++// The address of the call instruction needs to be 4-byte aligned to
++// ensure that it does not span a cache line so that it can be patched.
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const
++{
++  // to make sure the address of jal 4-byte aligned.
++  return align_up(current_offset, alignment_required()) - current_offset;
++}
++
++// With RVC a call instruction may get 2-byte aligned.
++// The address of the call instruction needs to be 4-byte aligned to
++// ensure that it does not span a cache line so that it can be patched.
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
++{
++  // skip the movptr in MacroAssembler::ic_call():
++  // lui + addi + slli + addi + slli + addi
++  // Though movptr() has already 4-byte aligned with or without RVC,
++  // We need to prevent from further changes by explicitly calculating the size.
++  const int movptr_size = 6 * NativeInstruction::instruction_size;
++  current_offset += movptr_size;
++  // to make sure the address of jal 4-byte aligned.
++  return align_up(current_offset, alignment_required()) - current_offset;
++}
++
 +// Indicate if the safepoint node needs the polling page as an input
 +
 +// the shared code plants the oop data at the start of the generated
@@ -28807,6 +28624,7 @@ index 000000000..137e9b7c7
 +
 +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 +  MacroAssembler _masm(&cbuf);
++  Assembler::CompressibleRegion cr(&_masm);
 +  __ ebreak();
 +}
 +
@@ -28824,13 +28642,14 @@ index 000000000..137e9b7c7
 +
 +  void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
 +    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes.
 +    for (int i = 0; i < _count; i++) {
 +      __ nop();
 +    }
 +  }
 +
 +  uint MachNopNode::size(PhaseRegAlloc*) const {
-+    return _count * NativeInstruction::instruction_size;
++    return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size);
 +  }
 +
 +//=============================================================================
@@ -28871,10 +28690,10 @@ index 000000000..137e9b7c7
 +    st->print("# stack bang size=%d\n\t", framesize);
 +  }
 +
++  st->print("sd  fp, [sp, #%d]\n\t", - 2 * wordSize);
++  st->print("sd  ra, [sp, #%d]\n\t", - wordSize);
++  if (PreserveFramePointer) { st->print("sub  fp, sp, #%d\n\t", 2 * wordSize); }
 +  st->print("sub sp, sp, #%d\n\t", framesize);
-+  st->print("sd  fp, [sp, #%d]", - 2 * wordSize);
-+  st->print("sd  ra, [sp, #%d]", - wordSize);
-+  if (PreserveFramePointer) { st->print("\n\tsub  fp, sp, #%d", 2 * wordSize); }
 +}
 +#endif
 +
@@ -28885,15 +28704,15 @@ index 000000000..137e9b7c7
 +
 +  // n.b. frame size includes space for return pc and fp
 +  const int framesize = C->frame_size_in_bytes();
-+  assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment");
 +
 +  // insert a nop at the start of the prolog so we can patch in a
 +  // branch if we need to invalidate the method later
 +  __ nop();
 +
 +  assert_cond(C != NULL);
++
 +  int bangsize = C->bang_size_in_bytes();
-+  if (C->need_stack_bang(bangsize) && UseStackBanging) {
++  if (C->need_stack_bang(bangsize)) {
 +    __ generate_stack_overflow_check(bangsize);
 +  }
 +
@@ -28949,7 +28768,7 @@ index 000000000..137e9b7c7
 +  if (do_polling() && C->is_method_compilation()) {
 +    st->print("# touch polling page\n\t");
 +    st->print("li  t0, #0x%lx\n\t", p2i(os::get_polling_page()));
-+    st->print("ld zr, [t0]");
++    st->print("ld  zr, [t0]");
 +  }
 +}
 +#endif
@@ -28986,6 +28805,9 @@ index 000000000..137e9b7c7
 +  return MachNode::pipeline_class();
 +}
 +
++// This method seems to be obsolete. It is declared in machnode.hpp
++// and defined in all *.ad files, but it is never called. Should we
++// get rid of it?
 +int MachEpilogNode::safepoint_offset() const {
 +  assert(do_polling(), "no return for this epilog node");
 +  return 4;
@@ -28995,7 +28817,7 @@ index 000000000..137e9b7c7
 +
 +// Figure out which register class each belongs in: rc_int, rc_float or
 +// rc_stack.
-+enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack };
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
 +
 +static enum RC rc_class(OptoReg::Name reg) {
 +
@@ -29016,13 +28838,7 @@ index 000000000..137e9b7c7
 +    return rc_float;
 +  }
 +
-+  // we have 32 vector register * 4 halves
-+  int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers;
-+  if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) {
-+    return rc_vector;
-+  }
-+
-+  // Between vector regs & stack is the flags regs.
++  // Between float regs & stack is the flags regs.
 +  assert(OptoReg::is_stack(reg), "blow up if spilling flags");
 +
 +  return rc_stack;
@@ -29060,32 +28876,9 @@ index 000000000..137e9b7c7
 +  int src_offset = ra_->reg2offset(src_lo);
 +  int dst_offset = ra_->reg2offset(dst_lo);
 +
-+  if (bottom_type() == NULL) {
-+    ShouldNotReachHere();
-+  } else if (bottom_type()->isa_vect() != NULL) {
-+    uint ireg = ideal_reg();
-+    if (ireg == Op_VecA && cbuf) {
-+      MacroAssembler _masm(cbuf);
-+      int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
-+      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
-+        // stack to stack
-+        __ spill_copy_vector_stack_to_stack(src_offset, dst_offset,
-+                                            vector_reg_size_in_bytes);
-+      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) {
-+        // vpr to stack
-+        __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo));
-+      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) {
-+        // stack to vpr
-+        __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo));
-+      } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) {
-+        // vpr to vpr
-+        __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo]));
-+      } else {
-+        ShouldNotReachHere();
-+      }
-+    }
-+  } else if (cbuf != NULL) {
++  if (cbuf != NULL) {
 +    MacroAssembler _masm(cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
 +    switch (src_lo_rc) {
 +      case rc_int:
 +        if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
@@ -29167,17 +28960,7 @@ index 000000000..137e9b7c7
 +    } else {
 +      st->print("%s", Matcher::regName[dst_lo]);
 +    }
-+    if (bottom_type()->isa_vect() != NULL) {
-+      int vsize = 0;
-+      if (ideal_reg() == Op_VecA) {
-+        vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8;
-+      } else {
-+        ShouldNotReachHere();
-+      }
-+      st->print("\t# vector spill size = %d", vsize);
-+    } else {
-+      st->print("\t# spill size = %d", is64 ? 64 : 32);
-+    }
++    st->print("\t# spill size = %d", is64 ? 64 : 32);
 +  }
 +
 +  return 0;
@@ -29249,14 +29032,16 @@ index 000000000..137e9b7c7
 +  assert_cond(st != NULL);
 +  st->print_cr("# MachUEPNode");
 +  if (UseCompressedClassPointers) {
-+    st->print_cr("\tlw t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
++    st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 +    if (Universe::narrow_klass_shift() != 0) {
 +      st->print_cr("\tdecode_klass_not_null t0, t0");
 +    }
 +  } else {
-+   st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
++    st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass");
 +  }
-+  st->print_cr("\tbne x10, t0, SharedRuntime::_ic_miss_stub\t # Inline cache check");
++  st->print_cr("\tbeq t0, t1, ic_hit");
++  st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check");
++  st->print_cr("\tic_hit:");
 +}
 +#endif
 +
@@ -29334,15 +29119,10 @@ index 000000000..137e9b7c7
 +  }
 +
 +  switch (opcode) {
-+    case Op_StrCompressedCopy: // fall through
-+    case Op_StrInflatedCopy:   // fall through
-+    case Op_HasNegatives:
-+      return UseRVV;
-+    case Op_EncodeISOArray:
-+      return UseRVV && SpecialEncodeISOArray;
 +    case Op_PopCountI:
 +    case Op_PopCountL:
 +      return UsePopCountInstruction;
++
 +    case Op_CountLeadingZerosI:
 +    case Op_CountLeadingZerosL:
 +    case Op_CountTrailingZerosI:
@@ -29355,17 +29135,12 @@ index 000000000..137e9b7c7
 +
 +// Identify extra cases that we might want to provide match rules for vector nodes and
 +// other intrinsics guarded with vector length (vlen).
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
-+  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
-+    return false;
-+  }
-+
-+  return op_vec_supported(opcode);
++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
++  return false;
 +}
 +
 +const bool Matcher::has_predicated_vectors(void) {
-+  return false;  // not supported
-+
++  return false;
 +}
 +
 +const int Matcher::float_pressure(int default_pressure_threshold) {
@@ -29414,11 +29189,6 @@ index 000000000..137e9b7c7
 +
 +// Vector width in bytes.
 +const int Matcher::vector_width_in_bytes(BasicType bt) {
-+  if (UseRVV) {
-+    // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV.
-+    // MaxVectorSize == VM_Version::_initial_vector_length
-+    return MaxVectorSize;
-+  }
 +  return 0;
 +}
 +
@@ -29432,34 +29202,13 @@ index 000000000..137e9b7c7
 +
 +// Vector ideal reg.
 +const uint Matcher::vector_ideal_reg(int len) {
-+  assert(MaxVectorSize >= len, "");
-+  if (UseRVV) {
-+    return Op_VecA;
-+  }
-+
 +  ShouldNotReachHere();
 +  return 0;
 +}
 +
 +const uint Matcher::vector_shift_count_ideal_reg(int size) {
-+  switch(size) {
-+    case  8: return Op_VecD;
-+    case 16: return Op_VecX;
-+    default:
-+      if (size == vector_width_in_bytes(T_BYTE)) {
-+        return Op_VecA;
-+      }
-+  }
-+  ShouldNotReachHere();
-+  return 0;
-+}
-+
-+const bool Matcher::supports_scalable_vector() {
-+  return UseRVV;
-+}
-+
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return Matcher::max_vector_size(bt);
++  fatal("vector shift is not supported");
++  return Node::NotAMachineReg;
 +}
 +
 +// AES support not yet implemented
@@ -29467,7 +29216,7 @@ index 000000000..137e9b7c7
 +  return false;
 +}
 +
-+// riscv supports misaligned vectors store/load.
++// RISC-V supports misaligned vectors store/load.
 +const bool Matcher::misaligned_vectors_ok() {
 +  return true;
 +}
@@ -29638,42 +29387,7 @@ index 000000000..137e9b7c7
 +// to be subsumed into complex addressing expressions or compute them
 +// into registers?
 +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
-+  assert_cond(m != NULL);
-+  if (clone_base_plus_offset_address(m, mstack, address_visited)) {
-+    return true;
-+  }
-+
-+  Node *off = m->in(AddPNode::Offset);
-+  if (off != NULL && off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
-+      size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
-+      // Are there other uses besides address expressions?
-+      !is_visited(off)) {
-+    address_visited.set(off->_idx); // Flag as address_visited
-+    mstack.push(off->in(2), Visit);
-+    Node *conv = off->in(1);
-+    if (conv->Opcode() == Op_ConvI2L &&
-+        // Are there other uses besides address expressions?
-+        !is_visited(conv)) {
-+      address_visited.set(conv->_idx); // Flag as address_visited
-+      mstack.push(conv->in(1), Pre_Visit);
-+    } else {
-+      mstack.push(conv, Pre_Visit);
-+    }
-+    address_visited.test_set(m->_idx); // Flag as address_visited
-+    mstack.push(m->in(AddPNode::Address), Pre_Visit);
-+    mstack.push(m->in(AddPNode::Base), Pre_Visit);
-+    return true;
-+  } else if (off != NULL && off->Opcode() == Op_ConvI2L &&
-+             // Are there other uses besides address expressions?
-+             !is_visited(off)) {
-+    address_visited.test_set(m->_idx); // Flag as address_visited
-+    address_visited.set(off->_idx); // Flag as address_visited
-+    mstack.push(off->in(1), Pre_Visit);
-+    mstack.push(m->in(AddPNode::Address), Pre_Visit);
-+    mstack.push(m->in(AddPNode::Base), Pre_Visit);
-+    return true;
-+  }
-+  return false;
++  return clone_base_plus_offset_address(m, mstack, address_visited);
 +}
 +
 +void Compile::reshape_address(AddPNode* addp) {
@@ -29714,9 +29428,10 @@ index 000000000..137e9b7c7
 +
 +  enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{
 +    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
 +    int64_t con = (int64_t)$src$$constant;
 +    Register dst_reg = as_Register($dst$$reg);
-+    __ mv(dst_reg, con);
++    __ li(dst_reg, con);
 +  %}
 +
 +  enc_class riscv_enc_mov_p(iRegP dst, immP src) %{
@@ -29733,15 +29448,16 @@ index 000000000..137e9b7c7
 +        __ mov_metadata(dst_reg, (Metadata*)con);
 +      } else {
 +        assert(rtype == relocInfo::none, "unexpected reloc type");
-+        __ mv(dst_reg, $src$$constant);
++        __ li(dst_reg, $src$$constant);
 +      }
 +    }
 +  %}
 +
 +  enc_class riscv_enc_mov_p1(iRegP dst) %{
 +    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
 +    Register dst_reg = as_Register($dst$$reg);
-+    __ mv(dst_reg, 1);
++    __ li(dst_reg, 1);
 +  %}
 +
 +  enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{
@@ -29792,42 +29508,42 @@ index 000000000..137e9b7c7
 +    }
 +  %}
 +
-+  enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
++  enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
 +    MacroAssembler _masm(&cbuf);
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
 +               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
 +               /*result as bool*/ true);
 +  %}
 +
-+  enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
++  enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
 +    MacroAssembler _masm(&cbuf);
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
 +               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
 +               /*result as bool*/ true);
 +  %}
 +
-+  enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
++  enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
 +    MacroAssembler _masm(&cbuf);
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
 +               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
 +               /*result as bool*/ true);
 +  %}
 +
-+  enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
++  enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
 +    MacroAssembler _masm(&cbuf);
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
 +               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
 +               /*result as bool*/ true);
 +  %}
 +
-+  enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{
++  enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{
 +    MacroAssembler _masm(&cbuf);
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
 +               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
 +               /*result as bool*/ true);
 +  %}
 +
-+  enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
++  enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{
 +    MacroAssembler _masm(&cbuf);
 +    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
 +               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
@@ -29845,7 +29561,7 @@ index 000000000..137e9b7c7
 +  enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{
 +    MacroAssembler _masm(&cbuf);
 +    Label* L = $lbl$$label;
-+    switch($cmp$$cmpcode) {
++    switch ($cmp$$cmpcode) {
 +      case(BoolTest::ge):
 +        __ j(*L);
 +        break;
@@ -29879,7 +29595,7 @@ index 000000000..137e9b7c7
 +
 +    __ bind(miss);
 +    if (!$primary) {
-+      __ mv(cr_reg, 1);
++      __ li(cr_reg, 1);
 +    }
 +
 +    __ bind(done);
@@ -29893,7 +29609,7 @@ index 000000000..137e9b7c7
 +    assert_cond(addr != NULL);
 +    if (!_method) {
 +      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
-+      call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type));
++      call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
 +      if (call == NULL) {
 +        ciEnv::current()->record_failure("CodeCache is full");
 +        return;
@@ -29902,19 +29618,19 @@ index 000000000..137e9b7c7
 +      int method_index = resolved_method_index(cbuf);
 +      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
 +                                                  : static_call_Relocation::spec(method_index);
-+      call = __ trampoline_call(Address(addr, rspec));
++      call = __ trampoline_call(Address(addr, rspec), &cbuf);
 +      if (call == NULL) {
 +        ciEnv::current()->record_failure("CodeCache is full");
 +        return;
 +      }
++
 +      // Emit stub for static call
-+      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call);
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
 +      if (stub == NULL) {
 +        ciEnv::current()->record_failure("CodeCache is full");
 +        return;
 +      }
 +    }
-+
 +  %}
 +
 +  enc_class riscv_enc_java_dynamic_call(method meth) %{
@@ -29964,19 +29680,19 @@ index 000000000..137e9b7c7
 +  %}
 +
 +  // using the cr register as the bool result: 0 for success; others failed.
-+  enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
++  enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{
 +    MacroAssembler _masm(&cbuf);
 +    Register flag = t1;
 +    Register oop = as_Register($object$$reg);
 +    Register box = as_Register($box$$reg);
-+    Register disp_hdr = as_Register($tmp$$reg);
++    Register disp_hdr = as_Register($tmp1$$reg);
 +    Register tmp = as_Register($tmp2$$reg);
 +    Label cont;
 +    Label object_has_monitor;
 +
 +    assert_different_registers(oop, box, tmp, disp_hdr, t0);
 +
-+    // Load markOop from object into displaced_header.
++    // Load markWord from object into displaced_header.
 +    __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
 +
 +    // Always do locking in runtime.
@@ -29986,7 +29702,6 @@ index 000000000..137e9b7c7
 +    }
 +
 +    if (UseBiasedLocking && !UseOptoBiasInlining) {
-+      // ignore slow case here
 +      __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag);
 +    }
 +
@@ -29996,15 +29711,15 @@ index 000000000..137e9b7c7
 +      __ bnez(t0, object_has_monitor);
 +    }
 +
-+    // Set tmp to be (markOop of object | UNLOCK_VALUE).
++    // Set tmp to be (markWord of object | UNLOCK_VALUE).
 +    __ ori(tmp, disp_hdr, markOopDesc::unlocked_value);
 +
 +    // Initialize the box. (Must happen before we update the object mark!)
 +    __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 +
-+    // Compare object markOop with an unlocked value (tmp) and if
-+    // equal exchange the stack address of our box with object markOop.
-+    // On failure disp_hdr contains the possibly locked markOop.
++    // Compare object markWord with an unlocked value (tmp) and if
++    // equal exchange the stack address of our box with object markWord.
++    // On failure disp_hdr contains the possibly locked markWord.
 +    __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq,
 +               Assembler::rl, /*result*/disp_hdr);
 +    __ mv(flag, zr);
@@ -30017,9 +29732,9 @@ index 000000000..137e9b7c7
 +    // We did not see an unlocked object so try the fast recursive case.
 +
 +    // Check if the owner is self by comparing the value in the
-+    // markOop of object (disp_hdr) with the stack pointer.
++    // markWord of object (disp_hdr) with the stack pointer.
 +    __ sub(disp_hdr, disp_hdr, sp);
-+    __ mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
++    __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
 +    // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont,
 +    // hence we can store 0 as the displaced header in the box, which indicates that it is a
 +    // recursive lock.
@@ -30038,7 +29753,7 @@ index 000000000..137e9b7c7
 +      // Try to CAS m->owner from NULL to current thread.
 +      __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value));
 +      __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq,
-+               Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
++                 Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected)
 +
 +      // Store a non-null value into the box to avoid looking like a re-entrant
 +      // lock. The fast-path monitor unlock code checks for
@@ -30052,5133 +29767,4015 @@ index 000000000..137e9b7c7
 +  %}
 +
 +  // using cr flag to indicate the fast_unlock result: 0 for success; others failed.
-+  enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
++  enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{
 +    MacroAssembler _masm(&cbuf);
 +    Register flag = t1;
 +    Register oop = as_Register($object$$reg);
 +    Register box = as_Register($box$$reg);
-+    Register disp_hdr = as_Register($tmp$$reg);
++    Register disp_hdr = as_Register($tmp1$$reg);
 +    Register tmp = as_Register($tmp2$$reg);
 +    Label cont;
 +    Label object_has_monitor;
 +
-+    assert_different_registers(oop, box, tmp, disp_hdr, flag);
-+
-+    // Always do locking in runtime.
-+    if (EmitSync & 0x01) {
-+      __ mv(flag, 1);
-+      return;
-+    }
-+
-+    if (UseBiasedLocking && !UseOptoBiasInlining) {
-+      __ biased_locking_exit(oop, tmp, cont, flag);
-+    }
-+
-+    // Find the lock address and load the displaced header from the stack.
-+    __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-+
-+    // If the displaced header is 0, we have a recursive unlock.
-+    __ mv(flag, disp_hdr);
-+    __ beqz(disp_hdr, cont);
-+
-+    // Handle existing monitor.
-+    if ((EmitSync & 0x02) == 0) {
-+      __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
-+      __ andi(t0, disp_hdr, markOopDesc::monitor_value);
-+      __ bnez(t0, object_has_monitor);
-+    }
-+
-+    // Check if it is still a light weight lock, this is true if we
-+    // see the stack address of the basicLock in the markOop of the
-+    // object.
-+
-+    __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
-+               Assembler::rl, /*result*/tmp);
-+    __ xorr(flag, box, tmp); // box == tmp if cas succeeds
-+    __ j(cont);
-+
-+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
-+
-+    // Handle existing monitor.
-+    if ((EmitSync & 0x02) == 0) {
-+      __ bind(object_has_monitor);
-+      __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
-+      __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-+      __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-+      __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
-+      __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
-+      __ bnez(flag, cont);
-+
-+      __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
-+      __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
-+      __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
-+      __ bnez(flag, cont);
-+      // need a release store here
-+      __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-+      __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+      __ sd(zr, Address(tmp)); // set unowned
-+    }
-+
-+    __ bind(cont);
-+  %}
-+
-+  // arithmetic encodings
-+
-+  enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register src1_reg = as_Register($src1$$reg);
-+    Register src2_reg = as_Register($src2$$reg);
-+    __ corrected_idivl(dst_reg, src1_reg, src2_reg, false);
-+  %}
-+
-+  enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register src1_reg = as_Register($src1$$reg);
-+    Register src2_reg = as_Register($src2$$reg);
-+    __ corrected_idivq(dst_reg, src1_reg, src2_reg, false);
-+  %}
-+
-+  enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register src1_reg = as_Register($src1$$reg);
-+    Register src2_reg = as_Register($src2$$reg);
-+    __ corrected_idivl(dst_reg, src1_reg, src2_reg, true);
-+  %}
-+
-+  enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register src1_reg = as_Register($src1$$reg);
-+    Register src2_reg = as_Register($src2$$reg);
-+    __ corrected_idivq(dst_reg, src1_reg, src2_reg, true);
-+  %}
-+
-+  enc_class riscv_enc_tail_call(iRegP jump_target) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register target_reg = as_Register($jump_target$$reg);
-+    __ jr(target_reg);
-+  %}
-+
-+  enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
-+    MacroAssembler _masm(&cbuf);
-+    Register target_reg = as_Register($jump_target$$reg);
-+    // exception oop should be in x10
-+    // ret addr has been popped into ra
-+    // callee expects it in x13
-+    __ mv(x13, ra);
-+    __ jr(target_reg);
-+  %}
-+
-+  enc_class riscv_enc_rethrow() %{
-+    MacroAssembler _masm(&cbuf);
-+    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
-+  %}
-+
-+  enc_class riscv_enc_ret() %{
-+    MacroAssembler _masm(&cbuf);
-+    __ ret();
-+  %}
-+
-+%}
-+
-+//----------FRAME--------------------------------------------------------------
-+// Definition of frame structure and management information.
-+//
-+//  S T A C K   L A Y O U T    Allocators stack-slot number
-+//                             |   (to get allocators register number
-+//  G  Owned by    |        |  v    add OptoReg::stack0())
-+//  r   CALLER     |        |
-+//  o     |        +--------+      pad to even-align allocators stack-slot
-+//  w     V        |  pad0  |        numbers; owned by CALLER
-+//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
-+//  h     ^        |   in   |  5
-+//        |        |  args  |  4   Holes in incoming args owned by SELF
-+//  |     |        |        |  3
-+//  |     |        +--------+
-+//  V     |        | old out|      Empty on Intel, window on Sparc
-+//        |    old |preserve|      Must be even aligned.
-+//        |     SP-+--------+----> Matcher::_old_SP, even aligned
-+//        |        |   in   |  3   area for Intel ret address
-+//     Owned by    |preserve|      Empty on Sparc.
-+//       SELF      +--------+
-+//        |        |  pad2  |  2   pad to align old SP
-+//        |        +--------+  1
-+//        |        | locks  |  0
-+//        |        +--------+----> OptoReg::stack0(), even aligned
-+//        |        |  pad1  | 11   pad to align new SP
-+//        |        +--------+
-+//        |        |        | 10
-+//        |        | spills |  9   spills
-+//        V        |        |  8   (pad0 slot for callee)
-+//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
-+//        ^        |  out   |  7
-+//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
-+//     Owned by    +--------+
-+//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
-+//        |    new |preserve|      Must be even-aligned.
-+//        |     SP-+--------+----> Matcher::_new_SP, even aligned
-+//        |        |        |
-+//
-+// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
-+//         known from SELF's arguments and the Java calling convention.
-+//         Region 6-7 is determined per call site.
-+// Note 2: If the calling convention leaves holes in the incoming argument
-+//         area, those holes are owned by SELF.  Holes in the outgoing area
-+//         are owned by the CALLEE.  Holes should not be nessecary in the
-+//         incoming area, as the Java calling convention is completely under
-+//         the control of the AD file.  Doubles can be sorted and packed to
-+//         avoid holes.  Holes in the outgoing arguments may be nessecary for
-+//         varargs C calling conventions.
-+// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
-+//         even aligned with pad0 as needed.
-+//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
-+//           (the latter is true on Intel but is it false on RISCV?)
-+//         region 6-11 is even aligned; it may be padded out more so that
-+//         the region from SP to FP meets the minimum stack alignment.
-+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
-+//         alignment.  Region 11, pad1, may be dynamically extended so that
-+//         SP meets the minimum alignment.
-+
-+frame %{
-+  // What direction does stack grow in (assumed to be same for C & Java)
-+  stack_direction(TOWARDS_LOW);
-+
-+  // These three registers define part of the calling convention
-+  // between compiled code and the interpreter.
-+
-+  // Inline Cache Register or methodOop for I2C.
-+  inline_cache_reg(R31);
-+
-+  // Method Oop Register when calling interpreter.
-+  interpreter_method_oop_reg(R31);
-+
-+  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
-+  cisc_spilling_operand_name(indOffset);
-+
-+  // Number of stack slots consumed by locking an object
-+  // generate Compile::sync_stack_slots
-+  // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2
-+  sync_stack_slots(1 * VMRegImpl::slots_per_word);
-+
-+  // Compiled code's Frame Pointer
-+  frame_pointer(R2);
-+
-+  // Interpreter stores its frame pointer in a register which is
-+  // stored to the stack by I2CAdaptors.
-+  // I2CAdaptors convert from interpreted java to compiled java.
-+  interpreter_frame_pointer(R8);
-+
-+  // Stack alignment requirement
-+  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
-+
-+  // Number of stack slots between incoming argument block and the start of
-+  // a new frame.  The PROLOG must add this many slots to the stack.  The
-+  // EPILOG must remove this many slots.
-+  // RISCV needs two words for RA (return address) and FP (frame pointer).
-+  in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
-+
-+  // Number of outgoing stack slots killed above the out_preserve_stack_slots
-+  // for calls to C.  Supports the var-args backing area for register parms.
-+  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
-+
-+  // The after-PROLOG location of the return address.  Location of
-+  // return address specifies a type (REG or STACK) and a number
-+  // representing the register number (i.e. - use a register name) or
-+  // stack slot.
-+  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
-+  // Otherwise, it is above the locks and verification slot and alignment word
-+  // TODO this may well be correct but need to check why that - 2 is there
-+  // ppc port uses 0 but we definitely need to allow for fixed_slots
-+  // which folds in the space used for monitors
-+  return_addr(STACK - 2 +
-+              align_up((Compile::current()->in_preserve_stack_slots() +
-+                        Compile::current()->fixed_slots()),
-+                       stack_alignment_in_slots()));
-+
-+  // Body of function which returns an integer array locating
-+  // arguments either in registers or in stack slots.  Passed an array
-+  // of ideal registers called "sig" and a "length" count.  Stack-slot
-+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
-+  // arguments for a CALLEE.  Incoming stack arguments are
-+  // automatically biased by the preserve_stack_slots field above.
-+
-+  calling_convention
-+  %{
-+    // No difference between ingoing/outgoing just pass false
-+    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
-+  %}
-+
-+  c_calling_convention
-+  %{
-+    // This is obviously always outgoing
-+    (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
-+  %}
-+
-+  // Location of compiled Java return values.  Same as C for now.
-+  return_value
-+  %{
-+    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
-+           "only return normal values");
-+
-+    static const int lo[Op_RegL + 1] = { // enum name
-+      0,                                 // Op_Node
-+      0,                                 // Op_Set
-+      R10_num,                           // Op_RegN
-+      R10_num,                           // Op_RegI
-+      R10_num,                           // Op_RegP
-+      F10_num,                           // Op_RegF
-+      F10_num,                           // Op_RegD
-+      R10_num                            // Op_RegL
-+    };
-+
-+    static const int hi[Op_RegL + 1] = { // enum name
-+      0,                                 // Op_Node
-+      0,                                 // Op_Set
-+      OptoReg::Bad,                      // Op_RegN
-+      OptoReg::Bad,                      // Op_RegI
-+      R10_H_num,                         // Op_RegP
-+      OptoReg::Bad,                      // Op_RegF
-+      F10_H_num,                         // Op_RegD
-+      R10_H_num                          // Op_RegL
-+    };
-+
-+    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
-+  %}
-+%}
-+
-+//----------ATTRIBUTES---------------------------------------------------------
-+//----------Operand Attributes-------------------------------------------------
-+op_attrib op_cost(1);        // Required cost attribute
-+
-+//----------Instruction Attributes---------------------------------------------
-+ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
-+ins_attrib ins_size(32);        // Required size attribute (in bits)
-+ins_attrib ins_short_branch(0); // Required flag: is this instruction
-+                                // a non-matching short branch variant
-+                                // of some long branch?
-+ins_attrib ins_alignment(4);    // Required alignment attribute (must
-+                                // be a power of 2) specifies the
-+                                // alignment that some part of the
-+                                // instruction (not necessarily the
-+                                // start) requires.  If > 1, a
-+                                // compute_padding() function must be
-+                                // provided for the instruction
-+
-+//----------OPERANDS-----------------------------------------------------------
-+// Operand definitions must precede instruction definitions for correct parsing
-+// in the ADLC because operands constitute user defined types which are used in
-+// instruction definitions.
-+
-+//----------Simple Operands----------------------------------------------------
-+
-+// Integer operands 32 bit
-+// 32 bit immediate
-+operand immI()
-+%{
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// 32 bit zero
-+operand immI0()
-+%{
-+  predicate(n->get_int() == 0);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// 32 bit unit increment
-+operand immI_1()
-+%{
-+  predicate(n->get_int() == 1);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// 32 bit unit decrement
-+operand immI_M1()
-+%{
-+  predicate(n->get_int() == -1);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Unsigned Integer Immediate:  6-bit int, greater than 32
-+operand uimmI6_ge32() %{
-+  predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32));
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+operand immI_le_4()
-+%{
-+  predicate(n->get_int() <= 4);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+operand immI_16()
-+%{
-+  predicate(n->get_int() == 16);
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+operand immI_24()
-+%{
-+  predicate(n->get_int() == 24);
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+operand immI_31()
-+%{
-+  predicate(n->get_int() == 31);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+operand immI_32()
-+%{
-+  predicate(n->get_int() == 32);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+operand immI_63()
-+%{
-+  predicate(n->get_int() == 63);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+operand immI_64()
-+%{
-+  predicate(n->get_int() == 64);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// 32 bit integer valid for add immediate
-+operand immIAdd()
-+%{
-+  predicate(Assembler::operand_valid_for_add_immediate((long)n->get_int()));
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// 32 bit integer valid for sub immediate
-+operand immISub()
-+%{
-+  predicate(Assembler::operand_valid_for_add_immediate(-(long)n->get_int()));
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// 5 bit signed value.
-+operand immI5()
-+%{
-+  predicate(n->get_int() <= 15 && n->get_int() >= -16);
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// 5 bit signed value (simm5)
-+operand immL5()
-+%{
-+  predicate(n->get_long() <= 15 && n->get_long() >= -16);
-+  match(ConL);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Integer operands 64 bit
-+// 64 bit immediate
-+operand immL()
-+%{
-+  match(ConL);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// 64 bit zero
-+operand immL0()
-+%{
-+  predicate(n->get_long() == 0);
-+  match(ConL);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Pointer operands
-+// Pointer Immediate
-+operand immP()
-+%{
-+  match(ConP);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// NULL Pointer Immediate
-+operand immP0()
-+%{
-+  predicate(n->get_ptr() == 0);
-+  match(ConP);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Pointer Immediate One
-+// this is used in object initialization (initial object header)
-+operand immP_1()
-+%{
-+  predicate(n->get_ptr() == 1);
-+  match(ConP);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Polling Page Pointer Immediate
-+operand immPollPage()
-+%{
-+  predicate((address)n->get_ptr() == os::get_polling_page());
-+  match(ConP);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Card Table Byte Map Base
-+operand immByteMapBase()
-+%{
-+  // Get base of card map
-+  predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
-+            (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
-+  match(ConP);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Int Immediate: low 16-bit mask
-+operand immI_16bits()
-+%{
-+  predicate(n->get_int() == 0xFFFF);
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Long Immediate: low 32-bit mask
-+operand immL_32bits()
-+%{
-+  predicate(n->get_long() == 0xFFFFFFFFL);
-+  match(ConL);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// 64 bit unit decrement
-+operand immL_M1()
-+%{
-+  predicate(n->get_long() == -1);
-+  match(ConL);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+
-+// 32 bit offset of pc in thread anchor
-+
-+operand immL_pc_off()
-+%{
-+  predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
-+                             in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
-+  match(ConL);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// 64 bit integer valid for add immediate
-+operand immLAdd()
-+%{
-+  predicate(Assembler::operand_valid_for_add_immediate(n->get_long()));
-+  match(ConL);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// 64 bit integer valid for sub immediate
-+operand immLSub()
-+%{
-+  predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long())));
-+  match(ConL);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Narrow pointer operands
-+// Narrow Pointer Immediate
-+operand immN()
-+%{
-+  match(ConN);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Narrow NULL Pointer Immediate
-+operand immN0()
-+%{
-+  predicate(n->get_narrowcon() == 0);
-+  match(ConN);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+operand immNKlass()
-+%{
-+  match(ConNKlass);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Float and Double operands
-+// Double Immediate
-+operand immD()
-+%{
-+  match(ConD);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Double Immediate: +0.0d
-+operand immD0()
-+%{
-+  predicate(jlong_cast(n->getd()) == 0);
-+  match(ConD);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Float Immediate
-+operand immF()
-+%{
-+  match(ConF);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Float Immediate: +0.0f.
-+operand immF0()
-+%{
-+  predicate(jint_cast(n->getf()) == 0);
-+  match(ConF);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+operand immIOffset()
-+%{
-+  predicate(is_imm_in_range(n->get_int(), 12, 0));
-+  match(ConI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+operand immLOffset()
-+%{
-+  predicate(is_imm_in_range(n->get_long(), 12, 0));
-+  match(ConL);
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Scale values
-+operand immIScale()
-+%{
-+  predicate(1 <= n->get_int() && (n->get_int() <= 3));
-+  match(ConI);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(CONST_INTER);
-+%}
-+
-+// Integer 32 bit Register Operands
-+operand iRegI()
-+%{
-+  constraint(ALLOC_IN_RC(any_reg32));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Integer 32 bit Register not Special
-+operand iRegINoSp()
-+%{
-+  constraint(ALLOC_IN_RC(no_special_reg32));
-+  match(RegI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Register R10 only
-+operand iRegI_R10()
-+%{
-+  constraint(ALLOC_IN_RC(int_r10_reg));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Register R12 only
-+operand iRegI_R12()
-+%{
-+  constraint(ALLOC_IN_RC(int_r12_reg));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Register R13 only
-+operand iRegI_R13()
-+%{
-+  constraint(ALLOC_IN_RC(int_r13_reg));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Register R14 only
-+operand iRegI_R14()
-+%{
-+  constraint(ALLOC_IN_RC(int_r14_reg));
-+  match(RegI);
-+  match(iRegINoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Integer 64 bit Register Operands
-+operand iRegL()
-+%{
-+  constraint(ALLOC_IN_RC(any_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Integer 64 bit Register not Special
-+operand iRegLNoSp()
-+%{
-+  constraint(ALLOC_IN_RC(no_special_reg));
-+  match(RegL);
-+  match(iRegL_R10);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Long 64 bit Register R28 only
-+operand iRegL_R28()
-+%{
-+  constraint(ALLOC_IN_RC(r28_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Long 64 bit Register R29 only
-+operand iRegL_R29()
-+%{
-+  constraint(ALLOC_IN_RC(r29_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Long 64 bit Register R30 only
-+operand iRegL_R30()
-+%{
-+  constraint(ALLOC_IN_RC(r30_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Pointer Register Operands
-+// Pointer Register
-+operand iRegP()
-+%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(RegP);
-+  match(iRegPNoSp);
-+  match(iRegP_R10);
-+  match(javaThread_RegP);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Pointer 64 bit Register not Special
-+operand iRegPNoSp()
-+%{
-+  constraint(ALLOC_IN_RC(no_special_ptr_reg));
-+  match(RegP);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand iRegP_R10()
-+%{
-+  constraint(ALLOC_IN_RC(r10_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Pointer 64 bit Register R11 only
-+operand iRegP_R11()
-+%{
-+  constraint(ALLOC_IN_RC(r11_reg));
-+  match(RegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand iRegP_R12()
-+%{
-+  constraint(ALLOC_IN_RC(r12_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Pointer 64 bit Register R13 only
-+operand iRegP_R13()
-+%{
-+  constraint(ALLOC_IN_RC(r13_reg));
-+  match(RegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand iRegP_R14()
-+%{
-+  constraint(ALLOC_IN_RC(r14_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand iRegP_R15()
-+%{
-+  constraint(ALLOC_IN_RC(r15_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand iRegP_R16()
-+%{
-+  constraint(ALLOC_IN_RC(r16_reg));
-+  match(RegP);
-+  // match(iRegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Pointer 64 bit Register R28 only
-+operand iRegP_R28()
-+%{
-+  constraint(ALLOC_IN_RC(r28_reg));
-+  match(RegP);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Pointer Register Operands
-+// Narrow Pointer Register
-+operand iRegN()
-+%{
-+  constraint(ALLOC_IN_RC(any_reg32));
-+  match(RegN);
-+  match(iRegNNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Integer 64 bit Register not Special
-+operand iRegNNoSp()
-+%{
-+  constraint(ALLOC_IN_RC(no_special_reg32));
-+  match(RegN);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// heap base register -- used for encoding immN0
-+operand iRegIHeapbase()
-+%{
-+  constraint(ALLOC_IN_RC(heapbase_reg));
-+  match(RegI);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Long 64 bit Register R10 only
-+operand iRegL_R10()
-+%{
-+  constraint(ALLOC_IN_RC(r10_reg));
-+  match(RegL);
-+  match(iRegLNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Float Register
-+// Float register operands
-+operand fRegF()
-+%{
-+  constraint(ALLOC_IN_RC(float_reg));
-+  match(RegF);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Double Register
-+// Double register operands
-+operand fRegD()
-+%{
-+  constraint(ALLOC_IN_RC(double_reg));
-+  match(RegD);
-+
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Generic vector class. This will be used for
-+// all vector operands.
-+operand vReg()
-+%{
-+  constraint(ALLOC_IN_RC(vectora_reg));
-+  match(VecA);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand vReg_V1()
-+%{
-+  constraint(ALLOC_IN_RC(v1_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand vReg_V2()
-+%{
-+  constraint(ALLOC_IN_RC(v2_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand vReg_V3()
-+%{
-+  constraint(ALLOC_IN_RC(v3_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand vReg_V4()
-+%{
-+  constraint(ALLOC_IN_RC(v4_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand vReg_V5()
-+%{
-+  constraint(ALLOC_IN_RC(v5_reg));
-+  match(VecA);
-+  match(vReg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+// Java Thread Register
-+operand javaThread_RegP(iRegP reg)
-+%{
-+  constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg
-+  match(reg);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+//----------Memory Operands----------------------------------------------------
-+// RISCV has only base_plus_offset and literal address mode, so no need to use
-+// index and scale. Here set index as 0xffffffff and scale as 0x0.
-+operand indirect(iRegP reg)
-+%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(reg);
-+  op_cost(0);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp(0x0);
-+  %}
-+%}
-+
-+operand indOffI(iRegP reg, immIOffset off)
-+%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP reg off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
-+  %}
-+%}
-+
-+operand indOffL(iRegP reg, immLOffset off)
-+%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP reg off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
-+  %}
-+%}
-+
-+operand indirectN(iRegN reg)
-+%{
-+  predicate(Universe::narrow_oop_shift() == 0);
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(DecodeN reg);
-+  op_cost(0);
-+  format %{ "[$reg]\t# narrow" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp(0x0);
-+  %}
-+%}
-+
-+operand indOffIN(iRegN reg, immIOffset off)
-+%{
-+  predicate(Universe::narrow_oop_shift() == 0);
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP (DecodeN reg) off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]\t# narrow" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
-+  %}
-+%}
-+
-+operand indOffLN(iRegN reg, immLOffset off)
-+%{
-+  predicate(Universe::narrow_oop_shift() == 0);
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP (DecodeN reg) off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]\t# narrow" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
-+  %}
-+%}
-+
-+// RISCV opto stubs need to write to the pc slot in the thread anchor
-+operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off)
-+%{
-+  constraint(ALLOC_IN_RC(ptr_reg));
-+  match(AddP reg off);
-+  op_cost(0);
-+  format %{ "[$reg, $off]" %}
-+  interface(MEMORY_INTER) %{
-+    base($reg);
-+    index(0xffffffff);
-+    scale(0x0);
-+    disp($off);
-+  %}
-+%}
-+
-+
-+//----------Special Memory Operands--------------------------------------------
-+// Stack Slot Operand - This operand is used for loading and storing temporary
-+//                      values on the stack where a match requires a value to
-+//                      flow through memory.
-+operand stackSlotI(sRegI reg)
-+%{
-+  constraint(ALLOC_IN_RC(stack_slots));
-+  // No match rule because this operand is only generated in matching
-+  // match(RegI);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base(0x02);  // RSP
-+    index(0xffffffff);  // No Index
-+    scale(0x0);  // No Scale
-+    disp($reg);  // Stack Offset
-+  %}
-+%}
-+
-+operand stackSlotF(sRegF reg)
-+%{
-+  constraint(ALLOC_IN_RC(stack_slots));
-+  // No match rule because this operand is only generated in matching
-+  // match(RegF);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base(0x02);  // RSP
-+    index(0xffffffff);  // No Index
-+    scale(0x0);  // No Scale
-+    disp($reg);  // Stack Offset
-+  %}
-+%}
-+
-+operand stackSlotD(sRegD reg)
-+%{
-+  constraint(ALLOC_IN_RC(stack_slots));
-+  // No match rule because this operand is only generated in matching
-+  // match(RegD);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base(0x02);  // RSP
-+    index(0xffffffff);  // No Index
-+    scale(0x0);  // No Scale
-+    disp($reg);  // Stack Offset
-+  %}
-+%}
-+
-+operand stackSlotL(sRegL reg)
-+%{
-+  constraint(ALLOC_IN_RC(stack_slots));
-+  // No match rule because this operand is only generated in matching
-+  // match(RegL);
-+  format %{ "[$reg]" %}
-+  interface(MEMORY_INTER) %{
-+    base(0x02);  // RSP
-+    index(0xffffffff);  // No Index
-+    scale(0x0);  // No Scale
-+    disp($reg);  // Stack Offset
-+  %}
-+%}
-+
-+// Special operand allowing long args to int ops to be truncated for free
-+
-+operand iRegL2I(iRegL reg) %{
++    assert_different_registers(oop, box, tmp, disp_hdr, flag);
 +
-+  op_cost(0);
++    // Always do locking in runtime.
++    if (EmitSync & 0x01) {
++      __ mv(flag, 1);
++      return;
++    }
 +
-+  match(ConvL2I reg);
++    if (UseBiasedLocking && !UseOptoBiasInlining) {
++      __ biased_locking_exit(oop, tmp, cont, flag);
++    }
 +
-+  format %{ "l2i($reg)" %}
++    // Find the lock address and load the displaced header from the stack.
++    __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
 +
-+  interface(REG_INTER)
-+%}
++    // If the displaced header is 0, we have a recursive unlock.
++    __ mv(flag, disp_hdr);
++    __ beqz(disp_hdr, cont);
 +
++    // Handle existing monitor.
++    if ((EmitSync & 0x02) == 0) {
++      __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
++      __ andi(t0, tmp, markOopDesc::monitor_value);
++      __ bnez(t0, object_has_monitor);
++    }
 +
-+// Comparison Operands
-+// NOTE: Label is a predefined operand which should not be redefined in
-+//       the AD file. It is generically handled within the ADLC.
++    // Check if it is still a light weight lock, this is true if we
++    // see the stack address of the basicLock in the markWord of the
++    // object.
 +
-+//----------Conditional Branch Operands----------------------------------------
-+// Comparison Op  - This is the operation of the comparison, and is limited to
-+//                  the following set of codes:
-+//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
-+//
-+// Other attributes of the comparison, such as unsignedness, are specified
-+// by the comparison instruction that sets a condition code flags register.
-+// That result is represented by a flags operand whose subtype is appropriate
-+// to the unsignedness (etc.) of the comparison.
-+//
-+// Later, the instruction which matches both the Comparison Op (a Bool) and
-+// the flags (produced by the Cmp) specifies the coding of the comparison op
-+// by matching a specific subtype of Bool operand below, such as cmpOpU.
++    __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed,
++               Assembler::rl, /*result*/tmp);
++    __ xorr(flag, box, tmp); // box == tmp if cas succeeds
++    __ j(cont);
 +
++    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 +
-+// used for signed integral comparisons and fp comparisons
-+operand cmpOp()
-+%{
-+  match(Bool);
++    // Handle existing monitor.
++    if ((EmitSync & 0x02) == 0) {
++      __ bind(object_has_monitor);
++      STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX);
++      __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor
++      __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
++      __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
++      __ xorr(flag, flag, xthread); // Will be 0 if we are the owner.
++      __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions
++      __ bnez(flag, cont);
 +
-+  format %{ "" %}
++      __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
++      __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
++      __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0.
++      __ bnez(flag, cont);
++      // need a release store here
++      __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
++      __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++      __ sd(zr, Address(tmp)); // set unowned
++    }
 +
-+  // the values in interface derives from struct BoolTest::mask
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gt");
-+    overflow(0x2, "overflow");
-+    less(0x3, "lt");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
++    __ bind(cont);
 +  %}
-+%}
 +
-+// used for unsigned integral comparisons
-+operand cmpOpU()
-+%{
-+  match(Bool);
++  // arithmetic encodings
 +
-+  format %{ "" %}
-+  // the values in interface derives from struct BoolTest::mask
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gtu");
-+    overflow(0x2, "overflow");
-+    less(0x3, "ltu");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "leu");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "geu");
++  enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
++    MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++    __ corrected_idivl(dst_reg, src1_reg, src2_reg, false);
 +  %}
-+%}
 +
-+// used for certain integral comparisons which can be
-+// converted to bxx instructions
-+operand cmpOpEqNe()
-+%{
-+  match(Bool);
-+  op_cost(0);
-+  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
-+            n->as_Bool()->_test._test == BoolTest::eq);
++  enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
++    MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++    __ corrected_idivq(dst_reg, src1_reg, src2_reg, false);
++  %}
 +
-+  format %{ "" %}
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gt");
-+    overflow(0x2, "overflow");
-+    less(0x3, "lt");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
++  enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
++    MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++    __ corrected_idivl(dst_reg, src1_reg, src2_reg, true);
 +  %}
-+%}
 +
-+operand cmpOpULtGe()
-+%{
-+  match(Bool);
-+  op_cost(0);
-+  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
-+            n->as_Bool()->_test._test == BoolTest::ge);
++  enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
++    MacroAssembler _masm(&cbuf);
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++    __ corrected_idivq(dst_reg, src1_reg, src2_reg, true);
++  %}
 +
-+  format %{ "" %}
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gt");
-+    overflow(0x2, "overflow");
-+    less(0x3, "lt");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
++  enc_class riscv_enc_tail_call(iRegP jump_target) %{
++    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    Register target_reg = as_Register($jump_target$$reg);
++    __ jr(target_reg);
 +  %}
-+%}
 +
-+operand cmpOpUEqNeLeGt()
-+%{
-+  match(Bool);
-+  op_cost(0);
-+  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
-+            n->as_Bool()->_test._test == BoolTest::eq ||
-+            n->as_Bool()->_test._test == BoolTest::le ||
-+            n->as_Bool()->_test._test == BoolTest::gt);
++  enc_class riscv_enc_tail_jmp(iRegP jump_target) %{
++    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    Register target_reg = as_Register($jump_target$$reg);
++    // exception oop should be in x10
++    // ret addr has been popped into ra
++    // callee expects it in x13
++    __ mv(x13, ra);
++    __ jr(target_reg);
++  %}
 +
-+  format %{ "" %}
-+  interface(COND_INTER) %{
-+    equal(0x0, "eq");
-+    greater(0x1, "gt");
-+    overflow(0x2, "overflow");
-+    less(0x3, "lt");
-+    not_equal(0x4, "ne");
-+    less_equal(0x5, "le");
-+    no_overflow(0x6, "no_overflow");
-+    greater_equal(0x7, "ge");
++  enc_class riscv_enc_rethrow() %{
++    MacroAssembler _masm(&cbuf);
++    __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
++  %}
++
++  enc_class riscv_enc_ret() %{
++    MacroAssembler _masm(&cbuf);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ret();
 +  %}
++
 +%}
 +
++//----------FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add OptoReg::stack0())
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |        |        |  3
++//  |     |        +--------+
++//  V     |        | old out|      Empty on Intel, window on Sparc
++//        |    old |preserve|      Must be even aligned.
++//        |     SP-+--------+----> Matcher::_old_SP, even aligned
++//        |        |   in   |  3   area for Intel ret address
++//     Owned by    |preserve|      Empty on Sparc.
++//       SELF      +--------+
++//        |        |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> OptoReg::stack0(), even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//     Owned by    +--------+
++//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
++//        |    new |preserve|      Must be even-aligned.
++//        |     SP-+--------+----> Matcher::_new_SP, even aligned
++//        |        |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//           (the latter is true on Intel but is it false on RISCV?)
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
++
++frame %{
++  // What direction does stack grow in (assumed to be same for C & Java)
++  stack_direction(TOWARDS_LOW);
 +
-+// Flags register, used as output of compare logic
-+operand rFlagsReg()
-+%{
-+  constraint(ALLOC_IN_RC(reg_flags));
-+  match(RegFlags);
++  // These three registers define part of the calling convention
++  // between compiled code and the interpreter.
 +
-+  op_cost(0);
-+  format %{ "RFLAGS" %}
-+  interface(REG_INTER);
-+%}
++  // Inline Cache Register or methodOop for I2C.
++  inline_cache_reg(R31);
 +
-+// Special Registers
++  // Method Oop Register when calling interpreter.
++  interpreter_method_oop_reg(R31);
 +
-+// Method Register
-+operand inline_cache_RegP(iRegP reg)
-+%{
-+  constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
-+  match(reg);
-+  match(iRegPNoSp);
-+  op_cost(0);
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
++  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset);
 +
-+//----------OPERAND CLASSES----------------------------------------------------
-+// Operand Classes are groups of operands that are used as to simplify
-+// instruction definitions by not requiring the AD writer to specify
-+// separate instructions for every form of operand when the
-+// instruction accepts multiple operand types with the same basic
-+// encoding and format. The classic case of this is memory operands.
++  // Number of stack slots consumed by locking an object
++  // generate Compile::sync_stack_slots
++  // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2
++  sync_stack_slots(1 * VMRegImpl::slots_per_word);
 +
-+// memory is used to define read/write location for load/store
-+// instruction defs. we can turn a memory op into an Address
++  // Compiled code's Frame Pointer
++  frame_pointer(R2);
 +
-+opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN);
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++  interpreter_frame_pointer(R8);
 +
-+// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
-+// operations. it allows the src to be either an iRegI or a (ConvL2I
-+// iRegL). in the latter case the l2i normally planted for a ConvL2I
-+// can be elided because the 32-bit instruction will just employ the
-+// lower 32 bits anyway.
-+//
-+// n.b. this does not elide all L2I conversions. if the truncated
-+// value is consumed by more than one operation then the ConvL2I
-+// cannot be bundled into the consuming nodes so an l2i gets planted
-+// (actually a mvw $dst $src) and the downstream instructions consume
-+// the result of the l2i as an iRegI input. That's a shame since the
-+// mvw is actually redundant but its not too costly.
++  // Stack alignment requirement
++  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
 +
-+opclass iRegIorL2I(iRegI, iRegL2I);
-+opclass iRegIorL(iRegI, iRegL);
-+opclass iRegNorP(iRegN, iRegP);
-+opclass iRegILNP(iRegI, iRegL, iRegN, iRegP);
-+opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp);
-+opclass immIorL(immI, immL);
++  // Number of stack slots between incoming argument block and the start of
++  // a new frame.  The PROLOG must add this many slots to the stack.  The
++  // EPILOG must remove this many slots. RISC-V needs two slots for
++  // return address and fp.
++  in_preserve_stack_slots(2 * VMRegImpl::slots_per_word);
 +
-+//----------PIPELINE-----------------------------------------------------------
-+// Rules which define the behavior of the target architectures pipeline.
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt);
 +
-+// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline
-+//pipe_desc(ID, EX, MEM, WR);
-+#define ID   S0
-+#define EX   S1
-+#define MEM  S2
-+#define WR   S3
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  // TODO this may well be correct but need to check why that - 2 is there
++  // ppc port uses 0 but we definitely need to allow for fixed_slots
++  // which folds in the space used for monitors
++  return_addr(STACK - 2 +
++              align_up((Compile::current()->in_preserve_stack_slots() +
++                        Compile::current()->fixed_slots()),
++                       stack_alignment_in_slots()));
 +
-+// Integer ALU reg operation
-+pipeline %{
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
 +
-+attributes %{
-+  // RISC-V instructions are of fixed length
-+  fixed_size_instructions;           // Fixed size instructions TODO does
-+  max_instructions_per_bundle = 2;   // Generic RISC-V 1, Sifive Series 7 2
-+  // RISC-V instructions come in 32-bit word units
-+  instruction_unit_size = 4;         // An instruction is 4 bytes long
-+  instruction_fetch_unit_size = 64;  // The processor fetches one line
-+  instruction_fetch_units = 1;       // of 64 bytes
++  calling_convention
++  %{
++    // No difference between ingoing/outgoing just pass false
++    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++  %}
 +
-+  // List of nop instructions
-+  nops( MachNop );
-+%}
++  c_calling_convention
++  %{
++    // This is obviously always outgoing
++    (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
++  %}
 +
-+// We don't use an actual pipeline model so don't care about resources
-+// or description. we do use pipeline classes to introduce fixed
-+// latencies
++  // Location of compiled Java return values.  Same as C for now.
++  return_value
++  %{
++    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
++           "only return normal values");
 +
-+//----------RESOURCES----------------------------------------------------------
-+// Resources are the functional units available to the machine
++    static const int lo[Op_RegL + 1] = { // enum name
++      0,                                 // Op_Node
++      0,                                 // Op_Set
++      R10_num,                           // Op_RegN
++      R10_num,                           // Op_RegI
++      R10_num,                           // Op_RegP
++      F10_num,                           // Op_RegF
++      F10_num,                           // Op_RegD
++      R10_num                            // Op_RegL
++    };
 +
-+// Generic RISC-V pipeline
-+// 1 decoder
-+// 1 instruction decoded per cycle
-+// 1 load/store ops per cycle, 1 branch, 1 FPU
-+// 1 mul, 1 div
++    static const int hi[Op_RegL + 1] = { // enum name
++      0,                                 // Op_Node
++      0,                                 // Op_Set
++      OptoReg::Bad,                      // Op_RegN
++      OptoReg::Bad,                      // Op_RegI
++      R10_H_num,                         // Op_RegP
++      OptoReg::Bad,                      // Op_RegF
++      F10_H_num,                         // Op_RegD
++      R10_H_num                          // Op_RegL
++    };
 +
-+resources ( DECODE,
-+            ALU,
-+            MUL,
-+            DIV,
-+            BRANCH,
-+            LDST,
-+            FPU);
++    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
++  %}
++%}
 +
-+//----------PIPELINE DESCRIPTION-----------------------------------------------
-+// Pipeline Description specifies the stages in the machine's pipeline
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(1);        // Required cost attribute
 +
-+// Define the pipeline as a generic 6 stage pipeline
-+pipe_desc(S0, S1, S2, S3, S4, S5);
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
++ins_attrib ins_size(32);        // Required size attribute (in bits)
++ins_attrib ins_short_branch(0); // Required flag: is this instruction
++                                // a non-matching short branch variant
++                                // of some long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must
++                                // be a power of 2) specifies the
++                                // alignment that some part of the
++                                // instruction (not necessarily the
++                                // start) requires.  If > 1, a
++                                // compute_padding() function must be
++                                // provided for the instruction
 +
-+//----------PIPELINE CLASSES---------------------------------------------------
-+// Pipeline Classes describe the stages in which input and output are
-+// referenced by the hardware pipeline.
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
 +
-+pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2)
++//----------Simple Operands----------------------------------------------------
++
++// Integer operands 32 bit
++// 32 bit immediate
++operand immI()
 +%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2)
++// 32 bit zero
++operand immI0()
 +%{
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(n->get_int() == 0);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_uop_s(fRegF dst, fRegF src)
++// 32 bit unit increment
++operand immI_1()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(n->get_int() == 1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// 32 bit unit decrement
++operand immI_M1()
++%{
++  predicate(n->get_int() == -1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_uop_d(fRegD dst, fRegD src)
-+%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++// Unsigned Integer Immediate:  6-bit int, greater than 32
++operand uimmI6_ge32() %{
++  predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_d2f(fRegF dst, fRegD src)
++operand immI_le_4()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(n->get_int() <= 4);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_f2d(fRegD dst, fRegF src)
++operand immI_16()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(n->get_int() == 16);
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_f2i(iRegINoSp dst, fRegF src)
++operand immI_24()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(n->get_int() == 24);
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_f2l(iRegLNoSp dst, fRegF src)
++operand immI_31()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(n->get_int() == 31);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_i2f(fRegF dst, iRegIorL2I src)
++operand immI_63()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(n->get_int() == 63);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_l2f(fRegF dst, iRegL src)
++// 32 bit integer valid for add immediate
++operand immIAdd()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int()));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_d2i(iRegINoSp dst, fRegD src)
++// 32 bit integer valid for sub immediate
++operand immISub()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int()));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_d2l(iRegLNoSp dst, fRegD src)
++// 5 bit signed value.
++operand immI5()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(n->get_int() <= 15 && n->get_int() >= -16);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_i2d(fRegD dst, iRegIorL2I src)
++// 5 bit signed value (simm5)
++operand immL5()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(n->get_long() <= 15 && n->get_long() >= -16);
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_l2d(fRegD dst, iRegIorL2I src)
++// Integer operands 64 bit
++// 64 bit immediate
++operand immL()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2)
++// 64 bit zero
++operand immL0()
 +%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(n->get_long() == 0);
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2)
++// Pointer operands
++// Pointer Immediate
++operand immP()
 +%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  match(ConP);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2)
++// NULL Pointer Immediate
++operand immP0()
 +%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(n->get_ptr() == 0);
++  match(ConP);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2)
++// Pointer Immediate One
++// this is used in object initialization (initial object header)
++operand immP_1()
 +%{
-+  single_instruction;
-+  src1   : S1(read);
-+  src2   : S2(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate(n->get_ptr() == 1);
++  match(ConP);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_load_constant_s(fRegF dst)
++// Polling Page Pointer Immediate
++operand immPollPage()
 +%{
-+  single_instruction;
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  predicate((address)n->get_ptr() == os::get_polling_page());
++  match(ConP);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_load_constant_d(fRegD dst)
++// Card Table Byte Map Base
++operand immByteMapBase()
 +%{
-+  single_instruction;
-+  dst    : S5(write);
-+  DECODE : ID;
-+  FPU    : S5;
++  // Get base of card map
++  predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
++            (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
++  match(ConP);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_load_mem_s(fRegF dst, memory mem)
++// Int Immediate: low 16-bit mask
++operand immI_16bits()
 +%{
-+  single_instruction;
-+  mem    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  LDST   : MEM;
++  predicate(n->get_int() == 0xFFFF);
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_load_mem_d(fRegD dst, memory mem)
-+%{
-+  single_instruction;
-+  mem    : S1(read);
-+  dst    : S5(write);
-+  DECODE : ID;
-+  LDST   : MEM;
++operand immIpowerOf2() %{
++  predicate(is_power_of_2((juint)(n->get_int())));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_store_reg_s(fRegF src, memory mem)
++// Long Immediate: low 32-bit mask
++operand immL_32bits()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  mem    : S5(write);
-+  DECODE : ID;
-+  LDST   : MEM;
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+pipe_class fp_store_reg_d(fRegD src, memory mem)
++// 64 bit unit decrement
++operand immL_M1()
 +%{
-+  single_instruction;
-+  src    : S1(read);
-+  mem    : S5(write);
-+  DECODE : ID;
-+  LDST   : MEM;
++  predicate(n->get_long() == -1);
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+//------- Integer ALU operations --------------------------
 +
-+// Integer ALU reg-reg operation
-+// Operands needs in ID, result generated in EX
-+// E.g.  ADD   Rd, Rs1, Rs2
-+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++// 32 bit offset of pc in thread anchor
++
++operand immL_pc_off()
 +%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
++  predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
++                             in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// Integer ALU reg operation with constant shift
-+// E.g. SLLI    Rd, Rs1, #shift
-+pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
++// 64 bit integer valid for add immediate
++operand immLAdd()
 +%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src1   : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
++  predicate(Assembler::operand_valid_for_add_immediate(n->get_long()));
++  match(ConL);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// Integer ALU reg-reg operation with variable shift
-+// both operands must be available in ID
-+// E.g. SLL   Rd, Rs1, Rs2
-+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
++// 64 bit integer valid for sub immediate
++operand immLSub()
 +%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
++  predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long())));
++  match(ConL);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// Integer ALU reg operation
-+// E.g. NEG   Rd, Rs2
-+pipe_class ialu_reg(iRegI dst, iRegI src)
++// Narrow pointer operands
++// Narrow Pointer Immediate
++operand immN()
 +%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src    : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
++  match(ConN);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// Integer ALU reg immediate operation
-+// E.g. ADDI   Rd, Rs1, #imm
-+pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
++// Narrow NULL Pointer Immediate
++operand immN0()
 +%{
-+  single_instruction;
-+  dst    : EX(write);
-+  src1   : ID(read);
-+  DECODE : ID;
-+  ALU    : EX;
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// Integer ALU immediate operation (no source operands)
-+// E.g. LI    Rd, #imm
-+pipe_class ialu_imm(iRegI dst)
++operand immNKlass()
 +%{
-+  single_instruction;
-+  dst    : EX(write);
-+  DECODE : ID;
-+  ALU    : EX;
-+%}
++  match(ConNKlass);
 +
-+//------- Multiply pipeline operations --------------------
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+// Multiply reg-reg
-+// E.g. MULW   Rd, Rs1, Rs2
-+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++// Float and Double operands
++// Double Immediate
++operand immD()
 +%{
-+  single_instruction;
-+  dst    : WR(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  MUL    : WR;
++  match(ConD);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// E.g. MUL   RD, Rs1, Rs2
-+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++// Double Immediate: +0.0d
++operand immD0()
 +%{
-+  single_instruction;
-+  fixed_latency(3); // Maximum latency for 64 bit mul
-+  dst    : WR(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  MUL    : WR;
-+%}
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
 +
-+//------- Divide pipeline operations --------------------
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+// E.g. DIVW   Rd, Rs1, Rs2
-+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++// Float Immediate
++operand immF()
 +%{
-+  single_instruction;
-+  fixed_latency(8); // Maximum latency for 32 bit divide
-+  dst    : WR(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  DIV    : WR;
++  match(ConF);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// E.g. DIV   RD, Rs1, Rs2
-+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++// Float Immediate: +0.0f.
++operand immF0()
 +%{
-+  single_instruction;
-+  fixed_latency(16); // Maximum latency for 64 bit divide
-+  dst    : WR(write);
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  DIV    : WR;
-+%}
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
 +
-+//------- Load pipeline operations ------------------------
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
 +
-+// Load - reg, mem
-+// E.g. LA    Rd, mem
-+pipe_class iload_reg_mem(iRegI dst, memory mem)
++operand immIOffset()
 +%{
-+  single_instruction;
-+  dst    : WR(write);
-+  mem    : ID(read);
-+  DECODE : ID;
-+  LDST   : MEM;
++  predicate(is_imm_in_range(n->get_int(), 12, 0));
++  match(ConI);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// Load - reg, reg
-+// E.g. LD    Rd, Rs
-+pipe_class iload_reg_reg(iRegI dst, iRegI src)
++operand immLOffset()
 +%{
-+  single_instruction;
-+  dst    : WR(write);
-+  src    : ID(read);
-+  DECODE : ID;
-+  LDST   : MEM;
++  predicate(is_imm_in_range(n->get_long(), 12, 0));
++  match(ConL);
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+//------- Store pipeline operations -----------------------
-+
-+// Store - zr, mem
-+// E.g. SD    zr, mem
-+pipe_class istore_mem(memory mem)
++// Scale values
++operand immIScale()
 +%{
-+  single_instruction;
-+  mem    : ID(read);
-+  DECODE : ID;
-+  LDST   : MEM;
++  predicate(1 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
 +%}
 +
-+// Store - reg, mem
-+// E.g. SD    Rs, mem
-+pipe_class istore_reg_mem(iRegI src, memory mem)
++// Integer 32 bit Register Operands
++operand iRegI()
 +%{
-+  single_instruction;
-+  mem    : ID(read);
-+  src    : EX(read);
-+  DECODE : ID;
-+  LDST   : MEM;
++  constraint(ALLOC_IN_RC(any_reg32));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Store - reg, reg
-+// E.g. SD    Rs2, Rs1
-+pipe_class istore_reg_reg(iRegI dst, iRegI src)
++// Integer 32 bit Register not Special
++operand iRegINoSp()
 +%{
-+  single_instruction;
-+  dst    : ID(read);
-+  src    : EX(read);
-+  DECODE : ID;
-+  LDST   : MEM;
++  constraint(ALLOC_IN_RC(no_special_reg32));
++  match(RegI);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+//------- Store pipeline operations -----------------------
-+
-+// Branch
-+pipe_class pipe_branch()
++// Register R10 only
++operand iRegI_R10()
 +%{
-+  single_instruction;
-+  DECODE : ID;
-+  BRANCH : EX;
++  constraint(ALLOC_IN_RC(int_r10_reg));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Branch
-+pipe_class pipe_branch_reg(iRegI src)
++// Register R12 only
++operand iRegI_R12()
 +%{
-+  single_instruction;
-+  src    : ID(read);
-+  DECODE : ID;
-+  BRANCH : EX;
++  constraint(ALLOC_IN_RC(int_r12_reg));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Compare & Branch
-+// E.g. BEQ   Rs1, Rs2, L
-+pipe_class pipe_cmp_branch(iRegI src1, iRegI src2)
++// Register R13 only
++operand iRegI_R13()
 +%{
-+  single_instruction;
-+  src1   : ID(read);
-+  src2   : ID(read);
-+  DECODE : ID;
-+  BRANCH : EX;
++  constraint(ALLOC_IN_RC(int_r13_reg));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// E.g. BEQZ Rs, L
-+pipe_class pipe_cmpz_branch(iRegI src)
++// Register R14 only
++operand iRegI_R14()
 +%{
-+  single_instruction;
-+  src    : ID(read);
-+  DECODE : ID;
-+  BRANCH : EX;
++  constraint(ALLOC_IN_RC(int_r14_reg));
++  match(RegI);
++  match(iRegINoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+//------- Synchronisation operations ----------------------
-+// Any operation requiring serialization
-+// E.g. FENCE/Atomic Ops/Load Acquire/Store Release
-+pipe_class pipe_serial()
++// Integer 64 bit Register Operands
++operand iRegL()
 +%{
-+  single_instruction;
-+  force_serialization;
-+  fixed_latency(16);
-+  DECODE : ID;
-+  LDST   : MEM;
++  constraint(ALLOC_IN_RC(any_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+pipe_class pipe_slow()
++// Integer 64 bit Register not Special
++operand iRegLNoSp()
 +%{
-+  instruction_count(10);
-+  multiple_bundles;
-+  force_serialization;
-+  fixed_latency(16);
-+  DECODE : ID;
-+  LDST   : MEM;
++  constraint(ALLOC_IN_RC(no_special_reg));
++  match(RegL);
++  match(iRegL_R10);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Empty pipeline class
-+pipe_class pipe_class_empty()
++// Long 64 bit Register R28 only
++operand iRegL_R28()
 +%{
-+  single_instruction;
-+  fixed_latency(0);
++  constraint(ALLOC_IN_RC(r28_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Default pipeline class.
-+pipe_class pipe_class_default()
++// Long 64 bit Register R29 only
++operand iRegL_R29()
 +%{
-+  single_instruction;
-+  fixed_latency(2);
++  constraint(ALLOC_IN_RC(r29_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Pipeline class for compares.
-+pipe_class pipe_class_compare()
++// Long 64 bit Register R30 only
++operand iRegL_R30()
 +%{
-+  single_instruction;
-+  fixed_latency(16);
++  constraint(ALLOC_IN_RC(r30_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Pipeline class for memory operations.
-+pipe_class pipe_class_memory()
++// Pointer Register Operands
++// Pointer Register
++operand iRegP()
 +%{
-+  single_instruction;
-+  fixed_latency(16);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(RegP);
++  match(iRegPNoSp);
++  match(iRegP_R10);
++  match(javaThread_RegP);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Pipeline class for call.
-+pipe_class pipe_class_call()
++// Pointer 64 bit Register not Special
++operand iRegPNoSp()
 +%{
-+  single_instruction;
-+  fixed_latency(100);
++  constraint(ALLOC_IN_RC(no_special_ptr_reg));
++  match(RegP);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Define the class for the Nop node.
-+define %{
-+   MachNop = pipe_class_empty;
-+%}
++operand iRegP_R10()
++%{
++  constraint(ALLOC_IN_RC(r10_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
-+//----------INSTRUCTIONS-------------------------------------------------------
-+//
-+// match      -- States which machine-independent subtree may be replaced
-+//               by this instruction.
-+// ins_cost   -- The estimated cost of this instruction is used by instruction
-+//               selection to identify a minimum cost tree of machine
-+//               instructions that matches a tree of machine-independent
-+//               instructions.
-+// format     -- A string providing the disassembly for this instruction.
-+//               The value of an instruction's operand may be inserted
-+//               by referring to it with a '$' prefix.
-+// opcode     -- Three instruction opcodes may be provided.  These are referred
-+//               to within an encode class as $primary, $secondary, and $tertiary
-+//               rrspectively.  The primary opcode is commonly used to
-+//               indicate the type of machine instruction, while secondary
-+//               and tertiary are often used for prefix options or addressing
-+//               modes.
-+// ins_encode -- A list of encode classes with parameters. The encode class
-+//               name must have been defined in an 'enc_class' specification
-+//               in the encode section of the architecture description.
-+
-+// ============================================================================
-+// Memory (Load/Store) Instructions
-+
-+// Load Instructions
 +
-+// Load Byte (8 bit signed)
-+instruct loadB(iRegINoSp dst, memory mem)
++// Pointer 64 bit Register R11 only
++operand iRegP_R11()
 +%{
-+  match(Set dst (LoadB mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lb  $dst, $mem\t# byte, #@loadB" %}
-+
-+  ins_encode %{
-+    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
++  constraint(ALLOC_IN_RC(r11_reg));
++  match(RegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Byte (8 bit signed) into long
-+instruct loadB2L(iRegLNoSp dst, memory mem)
++operand iRegP_R12()
 +%{
-+  match(Set dst (ConvI2L (LoadB mem)));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lb  $dst, $mem\t# byte, #@loadB2L" %}
-+
-+  ins_encode %{
-+    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
++  constraint(ALLOC_IN_RC(r12_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Byte (8 bit unsigned)
-+instruct loadUB(iRegINoSp dst, memory mem)
++// Pointer 64 bit Register R13 only
++operand iRegP_R13()
 +%{
-+  match(Set dst (LoadUB mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lbu  $dst, $mem\t# byte, #@loadUB" %}
-+
-+  ins_encode %{
-+    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
++  constraint(ALLOC_IN_RC(r13_reg));
++  match(RegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Byte (8 bit unsigned) into long
-+instruct loadUB2L(iRegLNoSp dst, memory mem)
++operand iRegP_R14()
 +%{
-+  match(Set dst (ConvI2L (LoadUB mem)));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lbu  $dst, $mem\t# byte, #@loadUB2L" %}
-+
-+  ins_encode %{
-+    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
++  constraint(ALLOC_IN_RC(r14_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Short (16 bit signed)
-+instruct loadS(iRegINoSp dst, memory mem)
++operand iRegP_R15()
 +%{
-+  match(Set dst (LoadS mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lh  $dst, $mem\t# short, #@loadS" %}
-+
-+  ins_encode %{
-+    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
++  constraint(ALLOC_IN_RC(r15_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Short (16 bit signed) into long
-+instruct loadS2L(iRegLNoSp dst, memory mem)
++operand iRegP_R16()
 +%{
-+  match(Set dst (ConvI2L (LoadS mem)));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lh  $dst, $mem\t# short, #@loadS2L" %}
-+
-+  ins_encode %{
-+    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
++  constraint(ALLOC_IN_RC(r16_reg));
++  match(RegP);
++  // match(iRegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Char (16 bit unsigned)
-+instruct loadUS(iRegINoSp dst, memory mem)
++// Pointer 64 bit Register R28 only
++operand iRegP_R28()
 +%{
-+  match(Set dst (LoadUS mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lhu  $dst, $mem\t# short, #@loadUS" %}
-+
-+  ins_encode %{
-+    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
++  constraint(ALLOC_IN_RC(r28_reg));
++  match(RegP);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Short/Char (16 bit unsigned) into long
-+instruct loadUS2L(iRegLNoSp dst, memory mem)
++// Pointer Register Operands
++// Narrow Pointer Register
++operand iRegN()
 +%{
-+  match(Set dst (ConvI2L (LoadUS mem)));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lhu  $dst, $mem\t# short, #@loadUS2L" %}
-+
-+  ins_encode %{
-+    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
++  constraint(ALLOC_IN_RC(any_reg32));
++  match(RegN);
++  match(iRegNNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Integer (32 bit signed)
-+instruct loadI(iRegINoSp dst, memory mem)
++// Integer 64 bit Register not Special
++operand iRegNNoSp()
 +%{
-+  match(Set dst (LoadI mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lw  $dst, $mem\t# int, #@loadI" %}
-+
-+  ins_encode %{
-+    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
++  constraint(ALLOC_IN_RC(no_special_reg32));
++  match(RegN);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Integer (32 bit signed) into long
-+instruct loadI2L(iRegLNoSp dst, memory mem)
++// heap base register -- used for encoding immN0
++operand iRegIHeapbase()
 +%{
-+  match(Set dst (ConvI2L (LoadI mem)));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lw  $dst, $mem\t# int, #@loadI2L" %}
-+
-+  ins_encode %{
-+    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
++  constraint(ALLOC_IN_RC(heapbase_reg));
++  match(RegI);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Integer (32 bit unsigned) into long
-+instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
++// Long 64 bit Register R10 only
++operand iRegL_R10()
 +%{
-+  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lwu  $dst, $mem\t# int, #@loadUI2L" %}
-+
-+  ins_encode %{
-+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
++  constraint(ALLOC_IN_RC(r10_reg));
++  match(RegL);
++  match(iRegLNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Long (64 bit signed)
-+instruct loadL(iRegLNoSp dst, memory mem)
++// Float Register
++// Float register operands
++operand fRegF()
 +%{
-+  match(Set dst (LoadL mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "ld  $dst, $mem\t# int, #@loadL" %}
-+
-+  ins_encode %{
-+    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  constraint(ALLOC_IN_RC(float_reg));
++  match(RegF);
 +
-+  ins_pipe(iload_reg_mem);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Range
-+instruct loadRange(iRegINoSp dst, memory mem)
++// Double Register
++// Double register operands
++operand fRegD()
 +%{
-+  match(Set dst (LoadRange mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lwu  $dst, $mem\t# range, #@loadRange" %}
-+
-+  ins_encode %{
-+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  constraint(ALLOC_IN_RC(double_reg));
++  match(RegD);
 +
-+  ins_pipe(iload_reg_mem);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Pointer
-+instruct loadP(iRegPNoSp dst, memory mem)
++// Java Thread Register
++operand javaThread_RegP(iRegP reg)
 +%{
-+  match(Set dst (LoadP mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}
++  constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg
++  match(reg);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
 +
-+  ins_encode %{
-+    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++//----------Memory Operands----------------------------------------------------
++// RISCV has only base_plus_offset and literal address mode, so no need to use
++// index and scale. Here set index as 0xffffffff and scale as 0x0.
++operand indirect(iRegP reg)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(reg);
++  op_cost(0);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp(0x0);
 +  %}
-+
-+  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Load Compressed Pointer
-+instruct loadN(iRegNNoSp dst, memory mem)
++operand indOffI(iRegP reg, immIOffset off)
 +%{
-+  match(Set dst (LoadN mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lwu  $dst, $mem\t# loadN, compressed ptr, #@loadN" %}
-+
-+  ins_encode %{
-+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg off);
++  op_cost(0);
++  format %{ "[$reg, $off]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
-+
-+  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Load Klass Pointer
-+instruct loadKlass(iRegPNoSp dst, memory mem)
++operand indOffL(iRegP reg, immLOffset off)
 +%{
-+  match(Set dst (LoadKlass mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "ld  $dst, $mem\t# class, #@loadKlass" %}
-+
-+  ins_encode %{
-+    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg off);
++  op_cost(0);
++  format %{ "[$reg, $off]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
-+
-+  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Load Narrow Klass Pointer
-+instruct loadNKlass(iRegNNoSp dst, memory mem)
++operand indirectN(iRegN reg)
 +%{
-+  match(Set dst (LoadNKlass mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "lwu  $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %}
-+
-+  ins_encode %{
-+    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(DecodeN reg);
++  op_cost(0);
++  format %{ "[$reg]\t# narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp(0x0);
 +  %}
-+
-+  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Load Float
-+instruct loadF(fRegF dst, memory mem)
++operand indOffIN(iRegN reg, immIOffset off)
 +%{
-+  match(Set dst (LoadF mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "flw  $dst, $mem\t# float, #@loadF" %}
-+
-+  ins_encode %{
-+    __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (DecodeN reg) off);
++  op_cost(0);
++  format %{ "[$reg, $off]\t# narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
-+
-+  ins_pipe(fp_load_mem_s);
 +%}
 +
-+// Load Double
-+instruct loadD(fRegD dst, memory mem)
++operand indOffLN(iRegN reg, immLOffset off)
 +%{
-+  match(Set dst (LoadD mem));
-+
-+  ins_cost(LOAD_COST);
-+  format %{ "fld  $dst, $mem\t# double, #@loadD" %}
-+
-+  ins_encode %{
-+    __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (DecodeN reg) off);
++  op_cost(0);
++  format %{ "[$reg, $off]\t# narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
 +  %}
-+
-+  ins_pipe(fp_load_mem_d);
 +%}
 +
-+// Load Int Constant
-+instruct loadConI(iRegINoSp dst, immI src)
++// RISCV opto stubs need to write to the pc slot in the thread anchor
++operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off)
 +%{
-+  match(Set dst src);
-+
-+  ins_cost(ALU_COST);
-+  format %{ "li $dst, $src\t# int, #@loadConI" %}
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg off);
++  op_cost(0);
++  format %{ "[$reg, $off]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0xffffffff);
++    scale(0x0);
++    disp($off);
++  %}
++%}
 +
-+  ins_encode(riscv_enc_li_imm(dst, src));
 +
-+  ins_pipe(ialu_imm);
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotI(sRegI reg)
++%{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  // match(RegI);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x02);  // RSP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
 +%}
 +
-+// Load Long Constant
-+instruct loadConL(iRegLNoSp dst, immL src)
++operand stackSlotF(sRegF reg)
 +%{
-+  match(Set dst src);
-+
-+  ins_cost(ALU_COST);
-+  format %{ "li $dst, $src\t# long, #@loadConL" %}
-+
-+  ins_encode(riscv_enc_li_imm(dst, src));
-+
-+  ins_pipe(ialu_imm);
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  // match(RegF);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x02);  // RSP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
 +%}
 +
-+// Load Pointer Constant
-+instruct loadConP(iRegPNoSp dst, immP con)
++operand stackSlotD(sRegD reg)
 +%{
-+  match(Set dst con);
-+
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# ptr, #@loadConP" %}
-+
-+  ins_encode(riscv_enc_mov_p(dst, con));
-+
-+  ins_pipe(ialu_imm);
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  // match(RegD);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x02);  // RSP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
 +%}
 +
-+// Load Null Pointer Constant
-+instruct loadConP0(iRegPNoSp dst, immP0 con)
++operand stackSlotL(sRegL reg)
 +%{
-+  match(Set dst con);
-+
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# NULL ptr, #@loadConP0" %}
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  // match(RegL);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x02);  // RSP
++    index(0xffffffff);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
 +
-+  ins_encode(riscv_enc_mov_zero(dst));
++// Special operand allowing long args to int ops to be truncated for free
 +
-+  ins_pipe(ialu_imm);
-+%}
++operand iRegL2I(iRegL reg) %{
 +
-+// Load Pointer Constant One
-+instruct loadConP1(iRegPNoSp dst, immP_1 con)
-+%{
-+  match(Set dst con);
++  op_cost(0);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# load ptr constant one, #@loadConP1" %}
++  match(ConvL2I reg);
 +
-+  ins_encode(riscv_enc_mov_p1(dst));
++  format %{ "l2i($reg)" %}
 +
-+  ins_pipe(ialu_imm);
++  interface(REG_INTER)
 +%}
 +
-+// Load Poll Page Constant
-+instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
-+%{
-+  match(Set dst con);
 +
-+  ins_cost(ALU_COST * 6);
-+  format %{ "movptr  $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %}
++// Comparison Operands
++// NOTE: Label is a predefined operand which should not be redefined in
++//       the AD file. It is generically handled within the ADLC.
 +
-+  ins_encode(riscv_enc_mov_poll_page(dst, con));
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOpU.
 +
-+  ins_pipe(ialu_imm);
-+%}
 +
-+// Load Byte Map Base Constant
-+instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
++// used for signed integral comparisons and fp comparisons
++operand cmpOp()
 +%{
-+  match(Set dst con);
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# Byte Map Base, #@loadByteMapBase" %}
++  match(Bool);
 +
-+  ins_encode(riscv_enc_mov_byte_map_base(dst));
++  format %{ "" %}
 +
-+  ins_pipe(ialu_imm);
++  // the values in interface derives from struct BoolTest::mask
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gt");
++    overflow(0x2, "overflow");
++    less(0x3, "lt");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "le");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "ge");
++  %}
 +%}
 +
-+// Load Narrow Pointer Constant
-+instruct loadConN(iRegNNoSp dst, immN con)
++// used for unsigned integral comparisons
++operand cmpOpU()
 +%{
-+  match(Set dst con);
-+
-+  ins_cost(ALU_COST * 4);
-+  format %{ "mv  $dst, $con\t# compressed ptr, #@loadConN" %}
-+
-+  ins_encode(riscv_enc_mov_n(dst, con));
++  match(Bool);
 +
-+  ins_pipe(ialu_imm);
++  format %{ "" %}
++  // the values in interface derives from struct BoolTest::mask
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gtu");
++    overflow(0x2, "overflow");
++    less(0x3, "ltu");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "leu");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "geu");
++  %}
 +%}
 +
-+// Load Narrow Null Pointer Constant
-+instruct loadConN0(iRegNNoSp dst, immN0 con)
++// used for certain integral comparisons which can be
++// converted to bxx instructions
++operand cmpOpEqNe()
 +%{
-+  match(Set dst con);
-+
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $con\t# compressed NULL ptr, #@loadConN0" %}
-+
-+  ins_encode(riscv_enc_mov_zero(dst));
++  match(Bool);
++  op_cost(0);
++  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
++            n->as_Bool()->_test._test == BoolTest::eq);
 +
-+  ins_pipe(ialu_imm);
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gt");
++    overflow(0x2, "overflow");
++    less(0x3, "lt");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "le");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "ge");
++  %}
 +%}
 +
-+// Load Narrow Klass Constant
-+instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
++operand cmpOpULtGe()
 +%{
-+  match(Set dst con);
-+
-+  ins_cost(ALU_COST * 6);
-+  format %{ "mv  $dst, $con\t# compressed klass ptr, #@loadConNKlass" %}
-+
-+  ins_encode(riscv_enc_mov_nk(dst, con));
++  match(Bool);
++  op_cost(0);
++  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
++            n->as_Bool()->_test._test == BoolTest::ge);
 +
-+  ins_pipe(ialu_imm);
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gtu");
++    overflow(0x2, "overflow");
++    less(0x3, "ltu");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "leu");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "geu");
++  %}
 +%}
 +
-+// Load Float Constant
-+instruct loadConF(fRegF dst, immF con) %{
-+  match(Set dst con);
-+
-+  ins_cost(LOAD_COST);
-+  format %{
-+    "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF"
-+  %}
++operand cmpOpUEqNeLeGt()
++%{
++  match(Bool);
++  op_cost(0);
++  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
++            n->as_Bool()->_test._test == BoolTest::eq ||
++            n->as_Bool()->_test._test == BoolTest::le ||
++            n->as_Bool()->_test._test == BoolTest::gt);
 +
-+  ins_encode %{
-+    __ flw(as_FloatRegister($dst$$reg), $constantaddress($con));
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x0, "eq");
++    greater(0x1, "gtu");
++    overflow(0x2, "overflow");
++    less(0x3, "ltu");
++    not_equal(0x4, "ne");
++    less_equal(0x5, "leu");
++    no_overflow(0x6, "no_overflow");
++    greater_equal(0x7, "geu");
 +  %}
-+
-+  ins_pipe(fp_load_constant_s);
 +%}
 +
-+instruct loadConF0(fRegF dst, immF0 con) %{
-+  match(Set dst con);
 +
-+  ins_cost(XFER_COST);
++// Flags register, used as output of compare logic
++operand rFlagsReg()
++%{
++  constraint(ALLOC_IN_RC(reg_flags));
++  match(RegFlags);
 +
-+  format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %}
++  op_cost(0);
++  format %{ "RFLAGS" %}
++  interface(REG_INTER);
++%}
 +
-+  ins_encode %{
-+    __ fmv_w_x(as_FloatRegister($dst$$reg), zr);
-+  %}
++// Special Registers
 +
-+  ins_pipe(fp_load_constant_s);
++// Method Register
++operand inline_cache_RegP(iRegP reg)
++%{
++  constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
++  match(reg);
++  match(iRegPNoSp);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
 +%}
 +
-+// Load Double Constant
-+instruct loadConD(fRegD dst, immD con) %{
-+  match(Set dst con);
++//----------OPERAND CLASSES----------------------------------------------------
++// Operand Classes are groups of operands that are used as to simplify
++// instruction definitions by not requiring the AD writer to specify
++// separate instructions for every form of operand when the
++// instruction accepts multiple operand types with the same basic
++// encoding and format. The classic case of this is memory operands.
 +
-+  ins_cost(LOAD_COST);
-+  format %{
-+    "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD"
-+  %}
++// memory is used to define read/write location for load/store
++// instruction defs. we can turn a memory op into an Address
 +
-+  ins_encode %{
-+    __ fld(as_FloatRegister($dst$$reg), $constantaddress($con));
-+  %}
++opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN);
 +
-+  ins_pipe(fp_load_constant_d);
-+%}
++// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
++// operations. it allows the src to be either an iRegI or a (ConvL2I
++// iRegL). in the latter case the l2i normally planted for a ConvL2I
++// can be elided because the 32-bit instruction will just employ the
++// lower 32 bits anyway.
++//
++// n.b. this does not elide all L2I conversions. if the truncated
++// value is consumed by more than one operation then the ConvL2I
++// cannot be bundled into the consuming nodes so an l2i gets planted
++// (actually a mvw $dst $src) and the downstream instructions consume
++// the result of the l2i as an iRegI input. That's a shame since the
++// mvw is actually redundant but its not too costly.
 +
-+instruct loadConD0(fRegD dst, immD0 con) %{
-+  match(Set dst con);
++opclass iRegIorL2I(iRegI, iRegL2I);
++opclass iRegIorL(iRegI, iRegL);
++opclass iRegNorP(iRegN, iRegP);
++opclass iRegILNP(iRegI, iRegL, iRegN, iRegP);
++opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp);
++opclass immIorL(immI, immL);
 +
-+  ins_cost(XFER_COST);
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
 +
-+  format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %}
++// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline
++//pipe_desc(ID, EX, MEM, WR);
++#define ID   S0
++#define EX   S1
++#define MEM  S2
++#define WR   S3
 +
-+  ins_encode %{
-+    __ fmv_d_x(as_FloatRegister($dst$$reg), zr);
-+  %}
++// Integer ALU reg operation
++pipeline %{
 +
-+  ins_pipe(fp_load_constant_d);
++attributes %{
++  // RISC-V instructions are of fixed length
++  fixed_size_instructions;           // Fixed size instructions TODO does
++  max_instructions_per_bundle = 2;   // Generic RISC-V 1, Sifive Series 7 2
++  // RISC-V instructions come in 32-bit word units
++  instruction_unit_size = 4;         // An instruction is 4 bytes long
++  instruction_fetch_unit_size = 64;  // The processor fetches one line
++  instruction_fetch_units = 1;       // of 64 bytes
++
++  // List of nop instructions
++  nops( MachNop );
 +%}
 +
-+// Store Instructions
-+// Store CMS card-mark Immediate
-+instruct storeimmCM0(immI0 zero, memory mem)
-+%{
-+  match(Set mem (StoreCM mem zero));
-+  predicate(unnecessary_storestore(n));
++// We don't use an actual pipeline model so don't care about resources
++// or description. we do use pipeline classes to introduce fixed
++// latencies
 +
-+  ins_cost(STORE_COST);
-+  format %{ "storestore (elided)\n\t"
-+            "sb zr, $mem\t# byte, #@storeimmCM0" %}
++//----------RESOURCES----------------------------------------------------------
++// Resources are the functional units available to the machine
 +
-+  ins_encode %{
-+    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++// Generic RISC-V pipeline
++// 1 decoder
++// 1 instruction decoded per cycle
++// 1 load/store ops per cycle, 1 branch, 1 FPU
++// 1 mul, 1 div
 +
-+  ins_pipe(istore_mem);
-+%}
++resources ( DECODE,
++            ALU,
++            MUL,
++            DIV,
++            BRANCH,
++            LDST,
++            FPU);
 +
-+// Store CMS card-mark Immediate with intervening StoreStore
-+// needed when using CMS with no conditional card marking
-+instruct storeimmCM0_ordered(immI0 zero, memory mem)
-+%{
-+  match(Set mem (StoreCM mem zero));
++//----------PIPELINE DESCRIPTION-----------------------------------------------
++// Pipeline Description specifies the stages in the machine's pipeline
 +
-+  ins_cost(ALU_COST + STORE_COST);
-+  format %{ "membar(StoreStore)\n\t"
-+            "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %}
++// Define the pipeline as a generic 6 stage pipeline
++pipe_desc(S0, S1, S2, S3, S4, S5);
 +
-+  ins_encode %{
-+    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++//----------PIPELINE CLASSES---------------------------------------------------
++// Pipeline Classes describe the stages in which input and output are
++// referenced by the hardware pipeline.
 +
-+  ins_pipe(istore_mem);
++pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2)
++%{
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Store Byte
-+instruct storeB(iRegIorL2I src, memory mem)
++pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2)
 +%{
-+  match(Set mem (StoreB mem src));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sb  $src, $mem\t# byte, #@storeB" %}
-+
-+  ins_encode %{
-+    __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(istore_reg_mem);
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct storeimmB0(immI0 zero, memory mem)
++pipe_class fp_uop_s(fRegF dst, fRegF src)
 +%{
-+  match(Set mem (StoreB mem zero));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sb zr, $mem\t# byte, #@storeimmB0" %}
-+
-+  ins_encode %{
-+    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(istore_mem);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Store Char/Short
-+instruct storeC(iRegIorL2I src, memory mem)
++pipe_class fp_uop_d(fRegD dst, fRegD src)
 +%{
-+  match(Set mem (StoreC mem src));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sh  $src, $mem\t# short, #@storeC" %}
-+
-+  ins_encode %{
-+    __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(istore_reg_mem);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct storeimmC0(immI0 zero, memory mem)
++pipe_class fp_d2f(fRegF dst, fRegD src)
 +%{
-+  match(Set mem (StoreC mem zero));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sh  zr, $mem\t# short, #@storeimmC0" %}
-+
-+  ins_encode %{
-+    __ sh(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(istore_mem);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Store Integer
-+instruct storeI(iRegIorL2I src, memory mem)
++pipe_class fp_f2d(fRegD dst, fRegF src)
 +%{
-+  match(Set mem(StoreI mem src));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sw  $src, $mem\t# int, #@storeI" %}
-+
-+  ins_encode %{
-+    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(istore_reg_mem);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct storeimmI0(immI0 zero, memory mem)
++pipe_class fp_f2i(iRegINoSp dst, fRegF src)
 +%{
-+  match(Set mem(StoreI mem zero));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sw  zr, $mem\t# int, #@storeimmI0" %}
-+
-+  ins_encode %{
-+    __ sw(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(istore_mem);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Store Long (64 bit signed)
-+instruct storeL(iRegL src, memory mem)
++pipe_class fp_f2l(iRegLNoSp dst, fRegF src)
 +%{
-+  match(Set mem (StoreL mem src));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sd  $src, $mem\t# long, #@storeL" %}
-+
-+  ins_encode %{
-+    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(istore_reg_mem);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Store Long (64 bit signed)
-+instruct storeimmL0(immL0 zero, memory mem)
++pipe_class fp_i2f(fRegF dst, iRegIorL2I src)
 +%{
-+  match(Set mem (StoreL mem zero));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sd  zr, $mem\t# long, #@storeimmL0" %}
-+
-+  ins_encode %{
-+    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(istore_mem);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Store Pointer
-+instruct storeP(iRegP src, memory mem)
++pipe_class fp_l2f(fRegF dst, iRegL src)
 +%{
-+  match(Set mem (StoreP mem src));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sd  $src, $mem\t# ptr, #@storeP" %}
-+
-+  ins_encode %{
-+    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(istore_reg_mem);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Store Pointer
-+instruct storeimmP0(immP0 zero, memory mem)
++pipe_class fp_d2i(iRegINoSp dst, fRegD src)
 +%{
-+  match(Set mem (StoreP mem zero));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %}
-+
-+  ins_encode %{
-+    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(istore_mem);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Store Compressed Pointer
-+instruct storeN(iRegN src, memory mem)
++pipe_class fp_d2l(iRegLNoSp dst, fRegD src)
 +%{
-+  match(Set mem (StoreN mem src));
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sw  $src, $mem\t# compressed ptr, #@storeN" %}
-+
-+  ins_encode %{
-+    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
-+
-+  ins_pipe(istore_reg_mem);
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
++pipe_class fp_i2d(fRegD dst, iRegIorL2I src)
 +%{
-+  match(Set mem (StoreN mem zero));
-+  predicate(Universe::narrow_oop_base() == NULL &&
-+            Universe::narrow_klass_base() == NULL);
-+
-+  ins_cost(STORE_COST);
-+  format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
-+
-+  ins_encode %{
-+    __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
++%}
 +
-+  ins_pipe(istore_reg_mem);
++pipe_class fp_l2d(fRegD dst, iRegIorL2I src)
++%{
++  single_instruction;
++  src    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Store Float
-+instruct storeF(fRegF src, memory mem)
++pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2)
 +%{
-+  match(Set mem (StoreF mem src));
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
++%}
 +
-+  ins_cost(STORE_COST);
-+  format %{ "fsw  $src, $mem\t# float, #@storeF" %}
++pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2)
++%{
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
++%}
 +
-+  ins_encode %{
-+    __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2)
++%{
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
++%}
 +
-+  ins_pipe(fp_store_reg_s);
++pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2)
++%{
++  single_instruction;
++  src1   : S1(read);
++  src2   : S2(read);
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
 +%}
 +
-+// Store Double
-+instruct storeD(fRegD src, memory mem)
++pipe_class fp_load_constant_s(fRegF dst)
 +%{
-+  match(Set mem (StoreD mem src));
++  single_instruction;
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
++%}
 +
-+  ins_cost(STORE_COST);
-+  format %{ "fsd  $src, $mem\t# double, #@storeD" %}
++pipe_class fp_load_constant_d(fRegD dst)
++%{
++  single_instruction;
++  dst    : S5(write);
++  DECODE : ID;
++  FPU    : S5;
++%}
 +
-+  ins_encode %{
-+    __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++pipe_class fp_load_mem_s(fRegF dst, memory mem)
++%{
++  single_instruction;
++  mem    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  ins_pipe(fp_store_reg_d);
++pipe_class fp_load_mem_d(fRegD dst, memory mem)
++%{
++  single_instruction;
++  mem    : S1(read);
++  dst    : S5(write);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// Store Compressed Klass Pointer
-+instruct storeNKlass(iRegN src, memory mem)
++pipe_class fp_store_reg_s(fRegF src, memory mem)
 +%{
-+  match(Set mem (StoreNKlass mem src));
++  single_instruction;
++  src    : S1(read);
++  mem    : S5(write);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  ins_cost(STORE_COST);
-+  format %{ "sw  $src, $mem\t# compressed klass ptr, #@storeNKlass" %}
++pipe_class fp_store_reg_d(fRegD src, memory mem)
++%{
++  single_instruction;
++  src    : S1(read);
++  mem    : S5(write);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  ins_encode %{
-+    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
-+  %}
++//------- Integer ALU operations --------------------------
 +
-+  ins_pipe(istore_reg_mem);
++// Integer ALU reg-reg operation
++// Operands needs in ID, result generated in EX
++// E.g.  ADD   Rd, Rs1, Rs2
++pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  dst    : EX(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  ALU    : EX;
 +%}
 +
-+// ============================================================================
-+// Atomic operation instructions
-+//
-+// Intel and SPARC both implement Ideal Node LoadPLocked and
-+// Store{PIL}Conditional instructions using a normal load for the
-+// LoadPLocked and a CAS for the Store{PIL}Conditional.
-+//
-+// The ideal code appears only to use LoadPLocked/storePConditional as a
-+// pair to lock object allocations from Eden space when not using
-+// TLABs.
-+//
-+// There does not appear to be a Load{IL}Locked Ideal Node and the
-+// Ideal code appears to use Store{IL}Conditional as an alias for CAS
-+// and to use StoreIConditional only for 32-bit and StoreLConditional
-+// only for 64-bit.
-+//
-+// We implement LoadPLocked and storePConditional instructions using,
-+// respectively the RISCV hw load-reserve and store-conditional
-+// instructions. Whereas we must implement each of
-+// Store{IL}Conditional using a CAS which employs a pair of
-+// instructions comprising a load-reserve followed by a
-+// store-conditional.
++// Integer ALU reg operation with constant shift
++// E.g. SLLI    Rd, Rs1, #shift
++pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
++%{
++  single_instruction;
++  dst    : EX(write);
++  src1   : ID(read);
++  DECODE : ID;
++  ALU    : EX;
++%}
 +
++// Integer ALU reg-reg operation with variable shift
++// both operands must be available in ID
++// E.g. SLL   Rd, Rs1, Rs2
++pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  dst    : EX(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  ALU    : EX;
++%}
 +
-+// Locked-load (load reserved) of the current heap-top
-+// used when updating the eden heap top
-+// implemented using lr_d on RISCV64
-+instruct loadPLocked(iRegPNoSp dst, indirect mem)
++// Integer ALU reg operation
++// E.g. NEG   Rd, Rs2
++pipe_class ialu_reg(iRegI dst, iRegI src)
 +%{
-+  match(Set dst (LoadPLocked mem));
++  single_instruction;
++  dst    : EX(write);
++  src    : ID(read);
++  DECODE : ID;
++  ALU    : EX;
++%}
 +
-+  ins_cost(ALU_COST * 2 + LOAD_COST);
++// Integer ALU reg immediate operation
++// E.g. ADDI   Rd, Rs1, #imm
++pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
++%{
++  single_instruction;
++  dst    : EX(write);
++  src1   : ID(read);
++  DECODE : ID;
++  ALU    : EX;
++%}
 +
-+  format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %}
++// Integer ALU immediate operation (no source operands)
++// E.g. LI    Rd, #imm
++pipe_class ialu_imm(iRegI dst)
++%{
++  single_instruction;
++  dst    : EX(write);
++  DECODE : ID;
++  ALU    : EX;
++%}
 +
-+  ins_encode %{
-+    __ la(t0, Address(as_Register($mem$$base), $mem$$disp));
-+    __ lr_d($dst$$Register, t0, Assembler::aq);
-+  %}
++//------- Multiply pipeline operations --------------------
 +
-+  ins_pipe(pipe_serial);
++// Multiply reg-reg
++// E.g. MULW   Rd, Rs1, Rs2
++pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  dst    : WR(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  MUL    : WR;
 +%}
 +
-+// Conditional-store of the updated heap-top.
-+// Used during allocation of the shared heap.
-+// implemented using sc_d on RISCV.
-+instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
++// E.g. MUL   RD, Rs1, Rs2
++pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
 +%{
-+  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
-+
-+  ins_cost(ALU_COST * 2 + STORE_COST);
-+
-+  format %{
-+    "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional"
-+  %}
++  single_instruction;
++  fixed_latency(3); // Maximum latency for 64 bit mul
++  dst    : WR(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  MUL    : WR;
++%}
 +
-+  ins_encode %{
-+    __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp));
-+    __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl);
-+  %}
++//------- Divide pipeline operations --------------------
 +
-+  ins_pipe(pipe_serial);
++// E.g. DIVW   Rd, Rs1, Rs2
++pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  fixed_latency(8); // Maximum latency for 32 bit divide
++  dst    : WR(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  DIV    : WR;
 +%}
 +
-+// storeLConditional is used by PhaseMacroExpand::expand_lock_node
-+// when attempting to rebias a lock towards the current thread.
-+instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
++// E.g. DIV   RD, Rs1, Rs2
++pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
 +%{
-+  match(Set cr (StoreLConditional mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST);
++  single_instruction;
++  fixed_latency(16); // Maximum latency for 64 bit divide
++  dst    : WR(write);
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  DIV    : WR;
++%}
 +
-+  format %{
-+    "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
-+    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional"
-+  %}
++//------- Load pipeline operations ------------------------
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
-+    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
-+  %}
++// Load - reg, mem
++// E.g. LA    Rd, mem
++pipe_class iload_reg_mem(iRegI dst, memory mem)
++%{
++  single_instruction;
++  dst    : WR(write);
++  mem    : ID(read);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  ins_pipe(pipe_slow);
++// Load - reg, reg
++// E.g. LD    Rd, Rs
++pipe_class iload_reg_reg(iRegI dst, iRegI src)
++%{
++  single_instruction;
++  dst    : WR(write);
++  src    : ID(read);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// storeIConditional also has acquire semantics, for no better reason
-+// than matching storeLConditional.
-+instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
-+%{
-+  match(Set cr (StoreIConditional mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2);
-+
-+  format %{
-+    "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
-+    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional"
-+  %}
-+
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
-+    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
-+  %}
++//------- Control transfer pipeline operations ------------
 +
-+  ins_pipe(pipe_slow);
++// Store - zr, mem
++// E.g. SD    zr, mem
++pipe_class istore_mem(memory mem)
++%{
++  single_instruction;
++  mem    : ID(read);
++  DECODE : ID;
++  LDST   : MEM;
 +%}
 +
-+// standard CompareAndSwapX when we are using barriers
-+// these have higher priority than the rules selected by a predicate
-+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// Store - reg, mem
++// E.g. SD    Rs, mem
++pipe_class istore_reg_mem(iRegI src, memory mem)
 +%{
-+  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
++  single_instruction;
++  mem    : ID(read);
++  src    : EX(read);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
++// Store - reg, reg
++// E.g. SD    Rs2, Rs1
++pipe_class istore_reg_reg(iRegI dst, iRegI src)
++%{
++  single_instruction;
++  dst    : ID(read);
++  src    : EX(read);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++//------- Store pipeline operations -----------------------
 +
-+  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB"
-+  %}
++// Branch
++pipe_class pipe_branch()
++%{
++  single_instruction;
++  DECODE : ID;
++  BRANCH : EX;
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+  %}
++// Branch
++pipe_class pipe_branch_reg(iRegI src)
++%{
++  single_instruction;
++  src    : ID(read);
++  DECODE : ID;
++  BRANCH : EX;
++%}
 +
-+  ins_pipe(pipe_slow);
++// Compare & Branch
++// E.g. BEQ   Rs1, Rs2, L
++pipe_class pipe_cmp_branch(iRegI src1, iRegI src2)
++%{
++  single_instruction;
++  src1   : ID(read);
++  src2   : ID(read);
++  DECODE : ID;
++  BRANCH : EX;
 +%}
 +
-+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// E.g. BEQZ Rs, L
++pipe_class pipe_cmpz_branch(iRegI src)
 +%{
-+  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
++  single_instruction;
++  src    : ID(read);
++  DECODE : ID;
++  BRANCH : EX;
++%}
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
++//------- Synchronisation operations ----------------------
++// Any operation requiring serialization
++// E.g. FENCE/Atomic Ops/Load Acquire/Store Release
++pipe_class pipe_serial()
++%{
++  single_instruction;
++  force_serialization;
++  fixed_latency(16);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++pipe_class pipe_slow()
++%{
++  instruction_count(10);
++  multiple_bundles;
++  force_serialization;
++  fixed_latency(16);
++  DECODE : ID;
++  LDST   : MEM;
++%}
 +
-+  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS"
-+  %}
++// Empty pipeline class
++pipe_class pipe_class_empty()
++%{
++  single_instruction;
++  fixed_latency(0);
++%}
 +
-+  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+  %}
++// Default pipeline class.
++pipe_class pipe_class_default()
++%{
++  single_instruction;
++  fixed_latency(2);
++%}
 +
-+  ins_pipe(pipe_slow);
++// Pipeline class for compares.
++pipe_class pipe_class_compare()
++%{
++  single_instruction;
++  fixed_latency(16);
 +%}
 +
-+instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
++// Pipeline class for memory operations.
++pipe_class pipe_class_memory()
 +%{
-+  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
++  single_instruction;
++  fixed_latency(16);
++%}
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++// Pipeline class for call.
++pipe_class pipe_class_call()
++%{
++  single_instruction;
++  fixed_latency(100);
++%}
 +
-+  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI"
-+  %}
++// Define the class for the Nop node.
++define %{
++   MachNop = pipe_class_empty;
++%}
++%}
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               rrspectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
 +
-+  ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval));
++// ============================================================================
++// Memory (Load/Store) Instructions
 +
-+  ins_pipe(pipe_slow);
-+%}
++// Load Instructions
 +
-+instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
++// Load Byte (8 bit signed)
++instruct loadB(iRegINoSp dst, memory mem)
 +%{
-+  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
++  match(Set dst (LoadB mem));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST);
++  format %{ "lb  $dst, $mem\t# byte, #@loadB" %}
 +
-+  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL"
++  ins_encode %{
++    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
-+
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++// Load Byte (8 bit signed) into long
++instruct loadB2L(iRegLNoSp dst, memory mem)
 +%{
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
++  match(Set dst (ConvI2L (LoadB mem)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST);
++  format %{ "lb  $dst, $mem\t# byte, #@loadB2L" %}
 +
-+  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP"
++  ins_encode %{
++    __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
-+
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
++// Load Byte (8 bit unsigned)
++instruct loadUB(iRegINoSp dst, memory mem)
 +%{
-+  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
++  match(Set dst (LoadUB mem));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST);
++  format %{ "lbu  $dst, $mem\t# byte, #@loadUB" %}
 +
-+  format %{
-+    "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN"
++  ins_encode %{
++    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval));
-+
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// alternative CompareAndSwapX when we are eliding barriers
-+instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                            iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// Load Byte (8 bit unsigned) into long
++instruct loadUB2L(iRegLNoSp dst, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  match(Set dst (ConvI2L (LoadUB mem)));
 +
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq"
-+  %}
++  ins_cost(LOAD_COST);
++  format %{ "lbu  $dst, $mem\t# byte, #@loadUB2L" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                            iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// Load Short (16 bit signed)
++instruct loadS(iRegINoSp dst, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  match(Set dst (LoadS mem));
 +
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq"
-+  %}
++  ins_cost(LOAD_COST);
++  format %{ "lh  $dst, $mem\t# short, #@loadS" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
-+                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval)
++// Load Short (16 bit signed) into long
++instruct loadS2L(iRegLNoSp dst, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
++  match(Set dst (ConvI2L (LoadS mem)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST);
++  format %{ "lh  $dst, $mem\t# short, #@loadS2L" %}
 +
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq"
++  ins_encode %{
++    __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval));
-+
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval)
++// Load Char (16 bit unsigned)
++instruct loadUS(iRegINoSp dst, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
++  match(Set dst (LoadUS mem));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST);
++  format %{ "lhu  $dst, $mem\t# short, #@loadUS" %}
 +
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq"
++  ins_encode %{
++    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
-+
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++// Load Short/Char (16 bit unsigned) into long
++instruct loadUS2L(iRegLNoSp dst, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
++  match(Set dst (ConvI2L (LoadUS mem)));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST);
++  format %{ "lhu  $dst, $mem\t# short, #@loadUS2L" %}
 +
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq"
++  ins_encode %{
++    __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
-+
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval)
++// Load Integer (32 bit signed)
++instruct loadI(iRegINoSp dst, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
++  match(Set dst (LoadI mem));
 +
-+  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
++  ins_cost(LOAD_COST);
++  format %{ "lw  $dst, $mem\t# int, #@loadI" %}
 +
-+  format %{
-+    "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq"
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval));
-+
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+// Sundry CAS operations.  Note that release is always true,
-+// regardless of the memory ordering of the CAS.  This is because we
-+// need the volatile case to be sequentially consistent but there is
-+// no trailing StoreLoad barrier emitted by C2.  Unfortunately we
-+// can't check the type of memory ordering here, so we always emit a
-+// sc_d(w) with rl bit set.
-+instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// Load Integer (32 bit signed) into long
++instruct loadI2L(iRegLNoSp dst, memory mem)
 +%{
-+  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  match(Set dst (ConvI2L (LoadI mem)));
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB"
-+  %}
++  ins_cost(LOAD_COST);
++  format %{ "lw  $dst, $mem\t# int, #@loadI2L" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// Load Integer (32 bit unsigned) into long
++instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
 +%{
-+  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS"
-+  %}
++  ins_cost(LOAD_COST);
++  format %{ "lwu  $dst, $mem\t# int, #@loadUI2L" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++// Load Long (64 bit signed)
++instruct loadL(iRegLNoSp dst, memory mem)
 +%{
-+  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-+
-+  effect(TEMP_DEF res);
++  match(Set dst (LoadL mem));
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI"
-+  %}
++  ins_cost(LOAD_COST);
++  format %{ "ld  $dst, $mem\t# int, #@loadL" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
++// Load Range
++instruct loadRange(iRegINoSp dst, memory mem)
 +%{
-+  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-+
-+  effect(TEMP_DEF res);
++  match(Set dst (LoadRange mem));
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL"
-+  %}
++  ins_cost(LOAD_COST);
++  format %{ "lwu  $dst, $mem\t# range, #@loadRange" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
++// Load Pointer
++instruct loadP(iRegPNoSp dst, memory mem)
 +%{
-+  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);
-+
-+  effect(TEMP_DEF res);
++  match(Set dst (LoadP mem));
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN"
-+  %}
++  ins_cost(LOAD_COST);
++  format %{ "ld  $dst, $mem\t# ptr, #@loadP" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
++// Load Compressed Pointer
++instruct loadN(iRegNNoSp dst, memory mem)
 +%{
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-+
-+  effect(TEMP_DEF res);
++  match(Set dst (LoadN mem));
 +
-+  format %{
-+    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP"
-+  %}
++  ins_cost(LOAD_COST);
++  format %{ "lwu  $dst, $mem\t# loadN, compressed ptr, #@loadN" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// Load Klass Pointer
++instruct loadKlass(iRegPNoSp dst, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  match(Set dst (LoadKlass mem));
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq"
-+  %}
++  ins_cost(LOAD_COST);
++  format %{ "ld  $dst, $mem\t# class, #@loadKlass" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// Load Narrow Klass Pointer
++instruct loadNKlass(iRegNNoSp dst, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  match(Set dst (LoadNKlass mem));
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq"
-+  %}
++  ins_cost(LOAD_COST);
++  format %{ "lwu  $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(iload_reg_mem);
 +%}
 +
-+instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++// Load Float
++instruct loadF(fRegF dst, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-+
-+  effect(TEMP_DEF res);
++  match(Set dst (LoadF mem));
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq"
-+  %}
++  ins_cost(LOAD_COST);
++  format %{ "flw  $dst, $mem\t# float, #@loadF" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(fp_load_mem_s);
 +%}
 +
-+instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
++// Load Double
++instruct loadD(fRegD dst, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
-+
-+  effect(TEMP_DEF res);
++  match(Set dst (LoadD mem));
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq"
-+  %}
++  ins_cost(LOAD_COST);
++  format %{ "fld  $dst, $mem\t# double, #@loadD" %}
 +
 +  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(fp_load_mem_d);
 +%}
 +
-+instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
++// Load Int Constant
++instruct loadConI(iRegINoSp dst, immI src)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
++  match(Set dst src);
 +
-+  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
++  ins_cost(ALU_COST);
++  format %{ "li $dst, $src\t# int, #@loadConI" %}
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++  ins_encode(riscv_enc_li_imm(dst, src));
 +
-+  effect(TEMP_DEF res);
++  ins_pipe(ialu_imm);
++%}
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq"
-+  %}
++// Load Long Constant
++instruct loadConL(iRegLNoSp dst, immL src)
++%{
++  match(Set dst src);
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+  %}
++  ins_cost(ALU_COST);
++  format %{ "li $dst, $src\t# long, #@loadConL" %}
 +
-+  ins_pipe(pipe_slow);
++  ins_encode(riscv_enc_li_imm(dst, src));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
++// Load Pointer Constant
++instruct loadConP(iRegPNoSp dst, immP con)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
++  match(Set dst con);
 +
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $con\t# ptr, #@loadConP" %}
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++  ins_encode(riscv_enc_mov_p(dst, con));
 +
-+  effect(TEMP_DEF res);
++  ins_pipe(ialu_imm);
++%}
 +
-+  format %{
-+    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq"
-+  %}
++// Load Null Pointer Constant
++instruct loadConP0(iRegPNoSp dst, immP0 con)
++%{
++  match(Set dst con);
 +
-+  ins_encode %{
-+    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
-+  %}
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $con\t# NULL ptr, #@loadConP0" %}
 +
-+  ins_pipe(pipe_slow);
++  ins_encode(riscv_enc_mov_zero(dst));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// Load Pointer Constant One
++instruct loadConP1(iRegPNoSp dst, immP_1 con)
 +%{
-+  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  match(Set dst con);
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapB"
-+  %}
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $con\t# load ptr constant one, #@loadConP1" %}
 +
-+  ins_encode %{
-+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+  %}
++  ins_encode(riscv_enc_mov_p1(dst));
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_imm);
 +%}
 +
-+instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                             iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
++// Load Poll Page Constant
++instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
 +%{
-+  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
++  match(Set dst con);
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
++  ins_cost(ALU_COST * 6);
++  format %{ "movptr  $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %}
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  ins_encode(riscv_enc_mov_poll_page(dst, con));
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapS"
-+  %}
++  ins_pipe(ialu_imm);
++%}
 +
-+  ins_encode %{
-+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
-+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
-+  %}
++// Load Byte Map Base Constant
++instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
++%{
++  match(Set dst con);
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $con\t# Byte Map Base, #@loadByteMapBase" %}
 +
-+  ins_pipe(pipe_slow);
++  ins_encode(riscv_enc_mov_byte_map_base(dst));
++
++  ins_pipe(ialu_imm);
 +%}
 +
-+instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++// Load Narrow Pointer Constant
++instruct loadConN(iRegNNoSp dst, immN con)
 +%{
-+  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++  match(Set dst con);
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapI"
-+  %}
++  ins_cost(ALU_COST * 4);
++  format %{ "mv  $dst, $con\t# compressed ptr, #@loadConN" %}
 +
-+  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+  %}
++  ins_encode(riscv_enc_mov_n(dst, con));
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_imm);
 +%}
 +
-+instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
++// Load Narrow Null Pointer Constant
++instruct loadConN0(iRegNNoSp dst, immN0 con)
 +%{
-+  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++  match(Set dst con);
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapL"
-+  %}
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $con\t# compressed NULL ptr, #@loadConN0" %}
 +
-+  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+  %}
++  ins_encode(riscv_enc_mov_zero(dst));
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_imm);
 +%}
 +
-+instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
++// Load Narrow Klass Constant
++instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
 +%{
-+  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
++  match(Set dst con);
 +
-+  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapN"
-+  %}
++  ins_cost(ALU_COST * 6);
++  format %{ "mv  $dst, $con\t# compressed klass ptr, #@loadConNKlass" %}
 +
-+  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
-+  %}
++  ins_encode(riscv_enc_mov_nk(dst, con));
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_imm);
 +%}
 +
-+instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
-+%{
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++// Load Float Constant
++instruct loadConF(fRegF dst, immF con) %{
++  match(Set dst con);
 +
++  ins_cost(LOAD_COST);
 +  format %{
-+    "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapP"
++    "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF"
 +  %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ flw(as_FloatRegister($dst$$reg), $constantaddress($con));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(fp_load_constant_s);
 +%}
 +
-+instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
-+%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
++instruct loadConF0(fRegF dst, immF0 con) %{
++  match(Set dst con);
 +
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++  ins_cost(XFER_COST);
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapBAcq"
-+  %}
++  format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %}
 +
 +  ins_encode %{
-+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
-+                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ fmv_w_x(as_FloatRegister($dst$$reg), zr);
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(fp_load_constant_s);
 +%}
 +
-+instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
-+                                iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr)
-+%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
-+
-+  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++// Load Double Constant
++instruct loadConD(fRegD dst, immD con) %{
++  match(Set dst con);
 +
++  ins_cost(LOAD_COST);
 +  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapSAcq"
++    "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD"
 +  %}
 +
 +  ins_encode %{
-+    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
-+                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
-+                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ fld(as_FloatRegister($dst$$reg), $constantaddress($con));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(fp_load_constant_d);
 +%}
 +
-+instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
-+%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
++instruct loadConD0(fRegD dst, immD0 con) %{
++  match(Set dst con);
 +
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++  ins_cost(XFER_COST);
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapIAcq"
-+  %}
++  format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
-+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ fmv_d_x(as_FloatRegister($dst$$reg), zr);
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(fp_load_constant_d);
 +%}
 +
-+instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
++// Store Instructions
++// Store CMS card-mark Immediate
++instruct storeimmCM0(immI0 zero, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++  match(Set mem (StoreCM mem zero));
++  predicate(unnecessary_storestore(n));
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapLAcq"
-+  %}
++  ins_cost(STORE_COST);
++  format %{ "storestore (elided)\n\t"
++            "sb zr, $mem\t# byte, #@storeimmCM0" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(istore_mem);
 +%}
 +
-+instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
++// Store CMS card-mark Immediate with intervening StoreStore
++// needed when using CMS with no conditional card marking
++instruct storeimmCM0_ordered(immI0 zero, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
++  match(Set mem (StoreCM mem zero));
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapNAcq"
-+  %}
++  ins_cost(ALU_COST + STORE_COST);
++  format %{ "membar(StoreStore)\n\t"
++            "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
-+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(istore_mem);
 +%}
 +
-+instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++// Store Byte
++instruct storeB(iRegIorL2I src, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+
-+  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++  match(Set mem (StoreB mem src));
 +
-+  format %{
-+    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
-+    "# $res == 1 when success, #@weakCompareAndSwapPAcq"
-+  %}
++  ins_cost(STORE_COST);
++  format %{ "sb  $src, $mem\t# byte, #@storeB" %}
 +
 +  ins_encode %{
-+    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
-+                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev)
++instruct storeimmB0(immI0 zero, memory mem)
 +%{
-+  match(Set prev (GetAndSetI mem newv));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem (StoreB mem zero));
 +
-+  format %{ "atomic_xchgw  $prev, $newv, [$mem]\t#@get_and_setI" %}
++  ins_cost(STORE_COST);
++  format %{ "sb zr, $mem\t# byte, #@storeimmB0" %}
 +
 +  ins_encode %{
-+    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
++    __ sb(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(istore_mem);
 +%}
 +
-+instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev)
++// Store Char/Short
++instruct storeC(iRegIorL2I src, memory mem)
 +%{
-+  match(Set prev (GetAndSetL mem newv));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem (StoreC mem src));
 +
-+  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setL" %}
++  ins_cost(STORE_COST);
++  format %{ "sh  $src, $mem\t# short, #@storeC" %}
 +
 +  ins_encode %{
-+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
++    __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
++instruct storeimmC0(immI0 zero, memory mem)
 +%{
-+  match(Set prev (GetAndSetN mem newv));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem (StoreC mem zero));
 +
-+  format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %}
++  ins_cost(STORE_COST);
++  format %{ "sh  zr, $mem\t# short, #@storeimmC0" %}
 +
 +  ins_encode %{
-+    __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
++    __ sh(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(istore_mem);
 +%}
 +
-+instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
++// Store Integer
++instruct storeI(iRegIorL2I src, memory mem)
 +%{
-+  match(Set prev (GetAndSetP mem newv));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem(StoreI mem src));
 +
-+  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setP" %}
++  ins_cost(STORE_COST);
++  format %{ "sw  $src, $mem\t# int, #@storeI" %}
 +
 +  ins_encode %{
-+    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev)
++instruct storeimmI0(immI0 zero, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set prev (GetAndSetI mem newv));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem(StoreI mem zero));
 +
-+  format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]\t#@get_and_setIAcq" %}
++  ins_cost(STORE_COST);
++  format %{ "sw  zr, $mem\t# int, #@storeimmI0" %}
 +
 +  ins_encode %{
-+    __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
++    __ sw(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(istore_mem);
 +%}
 +
-+instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
++// Store Long (64 bit signed)
++instruct storeL(iRegL src, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set prev (GetAndSetL mem newv));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem (StoreL mem src));
 +
-+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setLAcq" %}
++  ins_cost(STORE_COST);
++  format %{ "sd  $src, $mem\t# long, #@storeL" %}
 +
 +  ins_encode %{
-+    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
++// Store Long (64 bit signed)
++instruct storeimmL0(immL0 zero, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set prev (GetAndSetN mem newv));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem (StoreL mem zero));
 +
-+  format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %}
++  ins_cost(STORE_COST);
++  format %{ "sd  zr, $mem\t# long, #@storeimmL0" %}
 +
 +  ins_encode %{
-+    __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
++    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(istore_mem);
 +%}
 +
-+instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
++// Store Pointer
++instruct storeP(iRegP src, memory mem)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set prev (GetAndSetP mem newv));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem (StoreP mem src));
 +
-+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setPAcq" %}
++  ins_cost(STORE_COST);
++  format %{ "sd  $src, $mem\t# ptr, #@storeP" %}
 +
 +  ins_encode %{
-+    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr)
++// Store Pointer
++instruct storeimmP0(immP0 zero, memory mem)
 +%{
-+  match(Set newval (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem (StoreP mem zero));
 +
-+  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %}
++  ins_cost(STORE_COST);
++  format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %}
 +
 +  ins_encode %{
-+    __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
++    __ sd(zr, Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(istore_mem);
 +%}
 +
-+instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr)
++// Store Compressed Pointer
++instruct storeN(iRegN src, memory mem)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used());
-+
-+  match(Set dummy (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem (StoreN mem src));
 +
-+  format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %}
++  ins_cost(STORE_COST);
++  format %{ "sw  $src, $mem\t# compressed ptr, #@storeN" %}
 +
 +  ins_encode %{
-+    __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr)
++instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
 +%{
-+  match(Set newval (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem (StoreN mem zero));
++  predicate(Universe::narrow_oop_base() == NULL &&
++            Universe::narrow_klass_base() == NULL);
 +
-+  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %}
++  ins_cost(STORE_COST);
++  format %{ "sw  rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %}
 +
 +  ins_encode %{
-+    __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
++    __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr)
++// Store Float
++instruct storeF(fRegF src, memory mem)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used());
-+
-+  match(Set dummy (GetAndAddL mem incr));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem (StoreF mem src));
 +
-+  format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %}
++  ins_cost(STORE_COST);
++  format %{ "fsw  $src, $mem\t# float, #@storeF" %}
 +
 +  ins_encode %{
-+    __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
++    __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(fp_store_reg_s);
 +%}
 +
-+instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr)
++// Store Double
++instruct storeD(fRegD src, memory mem)
 +%{
-+  match(Set newval (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem (StoreD mem src));
 +
-+  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %}
++  ins_cost(STORE_COST);
++  format %{ "fsd  $src, $mem\t# double, #@storeD" %}
 +
 +  ins_encode %{
-+    __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(fp_store_reg_d);
 +%}
 +
-+instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr)
++// Store Compressed Klass Pointer
++instruct storeNKlass(iRegN src, memory mem)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used());
-+
-+  match(Set dummy (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
++  match(Set mem (StoreNKlass mem src));
 +
-+  format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %}
++  ins_cost(STORE_COST);
++  format %{ "sw  $src, $mem\t# compressed klass ptr, #@storeNKlass" %}
 +
 +  ins_encode %{
-+    __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(istore_reg_mem);
 +%}
 +
-+instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr)
++// ============================================================================
++// Atomic operation instructions
++//
++// Intel and SPARC both implement Ideal Node LoadPLocked and
++// Store{PIL}Conditional instructions using a normal load for the
++// LoadPLocked and a CAS for the Store{PIL}Conditional.
++//
++// The ideal code appears only to use LoadPLocked/storePConditional as a
++// pair to lock object allocations from Eden space when not using
++// TLABs.
++//
++// There does not appear to be a Load{IL}Locked Ideal Node and the
++// Ideal code appears to use Store{IL}Conditional as an alias for CAS
++// and to use StoreIConditional only for 32-bit and StoreLConditional
++// only for 64-bit.
++//
++// We implement LoadPLocked and storePConditional instructions using,
++// respectively the RISCV hw load-reserve and store-conditional
++// instructions. Whereas we must implement each of
++// Store{IL}Conditional using a CAS which employs a pair of
++// instructions comprising a load-reserve followed by a
++// store-conditional.
++
++
++// Locked-load (load reserved) of the current heap-top
++// used when updating the eden heap top
++// implemented using lr_d on RISCV64
++instruct loadPLocked(iRegPNoSp dst, indirect mem)
 +%{
-+  match(Set newval (GetAndAddI mem incr));
++  match(Set dst (LoadPLocked mem));
 +
-+  ins_cost(ALU_COST);
++  ins_cost(ALU_COST * 2 + LOAD_COST);
 +
-+  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %}
++  format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %}
 +
 +  ins_encode %{
-+    __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
++    __ la(t0, Address(as_Register($mem$$base), $mem$$disp));
++    __ lr_d($dst$$Register, t0, Assembler::aq);
 +  %}
 +
 +  ins_pipe(pipe_serial);
 +%}
 +
-+instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr)
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++// implemented using sc_d on RISCV64.
++instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used());
-+
-+  match(Set dummy (GetAndAddI mem incr));
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST);
++  ins_cost(ALU_COST * 2 + STORE_COST);
 +
-+  format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %}
++  format %{
++    "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional"
++  %}
 +
 +  ins_encode %{
-+    __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
++    __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp));
++    __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl);
 +  %}
 +
 +  ins_pipe(pipe_serial);
 +%}
 +
-+instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr)
++// storeLConditional is used by PhaseMacroExpand::expand_lock_node
++// when attempting to rebias a lock towards the current thread.  We
++// must use the acquire form of cmpxchg in order to guarantee acquire
++// semantics in this case.
++instruct storeLConditional(indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set newval (GetAndAddL mem incr));
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST);
++  ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST);
 +
-+  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %}
++  format %{
++    "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
++    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional"
++  %}
 +
 +  ins_encode %{
-+    __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
++    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
-+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
-+
-+  match(Set dummy (GetAndAddL mem incr));
++// storeIConditional also has acquire semantics, for no better reason
++// than matching storeLConditional.
++instruct storeIConditional(indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr)
++%{
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST);
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2);
 +
-+  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %}
++  format %{
++    "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
++    "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional"
++  %}
 +
 +  ins_encode %{
-+    __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register);
++    __ xorr($cr$$Register,$cr$$Register, $oldval$$Register);
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr)
++// standard CompareAndSwapX when we are using barriers
++// these have higher priority than the rules selected by a predicate
++instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                         iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
++  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
 +
-+  match(Set newval (GetAndAddL mem incr));
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
 +
-+  ins_cost(ALU_COST);
++  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
 +
-+  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %}
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB"
++  %}
 +
 +  ins_encode %{
-+    __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
++                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr)
++instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                         iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
++  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
 +
-+  match(Set dummy (GetAndAddL mem incr));
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
 +
-+  ins_cost(ALU_COST);
++  effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
 +
-+  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %}
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS"
++  %}
 +
 +  ins_encode %{
-+    __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                            Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register,
++                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr)
++instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set newval (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
++  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
 +
-+  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI"
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr)
++instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
-+
-+  match(Set dummy (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
++  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
 +
-+  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL"
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr)
++instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
 +%{
-+  predicate(needs_acquiring_load_exclusive(n));
-+
-+  match(Set newval (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
 +
-+  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP"
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr)
++instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
 +%{
-+  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
-+
-+  match(Set dummy (GetAndAddI mem incr));
-+
-+  ins_cost(ALU_COST);
++  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
 +
-+  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
++  format %{
++    "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN"
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+// ============================================================================
-+// Arithmetic Instructions
-+//
++// alternative CompareAndSwapX when we are eliding barriers
++instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                            iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+// Integer Addition
++  match(Set res (CompareAndSwapB mem (Binary oldval newval)));
 +
-+// TODO
-+// these currently employ operations which do not set CR and hence are
-+// not flagged as killing CR but we would like to isolate the cases
-+// where we want to set flags from those where we don't. need to work
-+// out how to do that.
-+instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (AddI src1 src2));
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "addw  $dst, $src1, $src2\t#@addI_reg_reg" %}
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq"
++  %}
 +
 +  ins_encode %{
-+    __ addw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
++                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
-+  match(Set dst (AddI src1 src2));
-+
-+  ins_cost(ALU_COST);
-+  format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm" %}
++instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                            iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_encode %{
-+    int32_t con = (int32_t)$src2$$constant;
-+    __ addiw(as_Register($dst$$reg),
-+             as_Register($src1$$reg),
-+             $src2$$constant);
-+  %}
++  match(Set res (CompareAndSwapS mem (Binary oldval newval)));
 +
-+  ins_pipe(ialu_reg_imm);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4);
 +
-+instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{
-+  match(Set dst (AddI (ConvL2I src1) src2));
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm_l2i" %}
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq"
++  %}
 +
 +  ins_encode %{
-+    __ addiw(as_Register($dst$$reg),
-+             as_Register($src1$$reg),
-+             $src2$$constant);
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                            Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register,
++                            true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Pointer Addition
-+instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
-+  match(Set dst (AddP src1 src2));
++instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %}
++  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
 +
-+  ins_encode %{
-+    __ add(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
++
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq"
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
-+%}
++  ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval));
 +
-+// If we shift more than 32 bits, we need not convert I2L.
-+instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{
-+  match(Set dst (LShiftL (ConvI2L src) scale));
-+  ins_cost(ALU_COST);
-+  format %{ "slli  $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  ins_encode %{
-+    __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63);
-+  %}
++instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_pipe(ialu_reg_shift);
-+%}
++  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
 +
-+// Pointer Immediate Addition
-+// n.b. this needs to be more expensive than using an indirect memory
-+// operand
-+instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{
-+  match(Set dst (AddP src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "addi  $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    // src2 is imm, so actually call the addi
-+    __ add(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           $src2$$constant);
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq"
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
-+%}
++  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
 +
-+// Long Addition
-+instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (AddL src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "add  $dst, $src1, $src2\t#@addL_reg_reg" %}
++  ins_pipe(pipe_slow);
++%}
 +
-+  ins_encode %{
-+    __ add(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
-+  %}
++instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_pipe(ialu_reg_reg);
-+%}
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
 +
-+// No constant pool entries requiredLong Immediate Addition.
-+instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
-+  match(Set dst (AddL src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "addi  $dst, $src1, $src2\t#@addL_reg_imm" %}
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4);
 +
-+  ins_encode %{
-+    // src2 is imm, so actually call the addi
-+    __ add(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           $src2$$constant);
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq"
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Integer Subtraction
-+instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (SubI src1 src2));
++instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "subw  $dst, $src1, $src2\t#@subI_reg_reg" %}
++  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
 +
-+  ins_encode %{
-+    __ subw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++  ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4);
++
++  format %{
++    "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t"
++    "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq"
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval));
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Immediate Subtraction
-+instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{
-+  match(Set dst (SubI src1 src2));
++// Sundry CAS operations.  Note that release is always true,
++// regardless of the memory ordering of the CAS.  This is because we
++// need the volatile case to be sequentially consistent but there is
++// no trailing StoreLoad barrier emitted by C2.  Unfortunately we
++// can't check the type of memory ordering here, so we always emit a
++// sc_d(w) with rl bit set.
++instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
++%{
++  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "addiw  $dst, $src1, -$src2\t#@subI_reg_imm" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
++
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB"
++  %}
 +
 +  ins_encode %{
-+    // src2 is imm, so actually call the addiw
-+    __ subw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            $src2$$constant);
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Long Subtraction
-+instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (SubL src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "sub  $dst, $src1, $src2\t#@subL_reg_reg" %}
++instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
++%{
++  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
 +
-+  ins_encode %{
-+    __ sub(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
-+  %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
 +
-+  ins_pipe(ialu_reg_reg);
-+%}
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+// No constant pool entries requiredLong Immediate Subtraction.
-+instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{
-+  match(Set dst (SubL src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "addi  $dst, $src1, -$src2\t#@subL_reg_imm" %}
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS"
++  %}
 +
 +  ins_encode %{
-+    // src2 is imm, so actually call the addi
-+    __ sub(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           $src2$$constant);
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                            /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Integer Negation (special case for sub)
++instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++%{
++  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
 +
-+instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
-+  match(Set dst (SubI zero src));
-+  ins_cost(ALU_COST);
-+  format %{ "subw  $dst, x0, $src\t# int, #@negI_reg" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI"
++  %}
 +
 +  ins_encode %{
-+    // actually call the subw
-+    __ negw(as_Register($dst$$reg),
-+            as_Register($src$$reg));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Long Negation
++instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
++%{
++  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
 +
-+instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{
-+  match(Set dst (SubL zero src));
-+  ins_cost(ALU_COST);
-+  format %{ "sub  $dst, x0, $src\t# long, #@negL_reg" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL"
++  %}
 +
 +  ins_encode %{
-+    // actually call the sub
-+    __ neg(as_Register($dst$$reg),
-+           as_Register($src$$reg));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Integer Multiply
++instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
++%{
++  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
 +
-+instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (MulI src1 src2));
-+  ins_cost(IMUL_COST);
-+  format %{ "mulw  $dst, $src1, $src2\t#@mulI" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3);
++
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN"
++  %}
 +
-+  //this means 2 word multi, and no sign extend to 64 bits
 +  ins_encode %{
-+    // riscv64 mulw will sign-extension to high 32 bits in dst reg
-+    __ mulw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(imul_reg_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Long Multiply
++instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
++%{
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
 +
-+instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (MulL src1 src2));
-+  ins_cost(IMUL_COST);
-+  format %{ "mul  $dst, $src1, $src2\t#@mulL" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
++
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP"
++  %}
 +
 +  ins_encode %{
-+    __ mul(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(lmul_reg_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2)
++instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
 +%{
-+  match(Set dst (MulHiL src1 src2));
-+  ins_cost(IMUL_COST);
-+  format %{ "mulh  $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %}
++  predicate(needs_acquiring_load_reserved(n));
++
++  match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5);
++
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq"
++  %}
 +
 +  ins_encode %{
-+    __ mulh(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
 +
-+  ins_pipe(lmul_reg_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Integer Divide
++instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (DivI src1 src2));
-+  ins_cost(IDIVSI_COST);
-+  format %{ "divw  $dst, $src1, $src2\t#@divI"%}
++  match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
 +
-+  ins_encode(riscv_enc_divw(dst, src1, src2));
-+  ins_pipe(idiv_reg_reg);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6);
 +
-+instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
-+  match(Set dst (URShiftI (RShiftI src1 div1) div2));
-+  ins_cost(ALU_COST);
-+  format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %}
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq"
++  %}
 +
 +  ins_encode %{
-+    __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
++    __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                            /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++                            /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
 +  %}
-+  ins_pipe(ialu_reg_shift);
++
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Long Divide
++instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (DivL src1 src2));
-+  ins_cost(IDIVDI_COST);
-+  format %{ "div  $dst, $src1, $src2\t#@divL" %}
++  match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
 +
-+  ins_encode(riscv_enc_div(dst, src1, src2));
-+  ins_pipe(ldiv_reg_reg);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
 +
-+instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
-+  match(Set dst (URShiftL (RShiftL src1 div1) div2));
-+  ins_cost(ALU_COST);
-+  format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %}
++  effect(TEMP_DEF res);
 +
-+  ins_encode %{
-+    __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63);
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq"
 +  %}
-+  ins_pipe(ialu_reg_shift);
-+%}
-+
-+// Integer Remainder
 +
-+instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (ModI src1 src2));
-+  ins_cost(IDIVSI_COST);
-+  format %{ "remw  $dst, $src1, $src2\t#@modI" %}
++  ins_encode %{
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++  %}
 +
-+  ins_encode(riscv_enc_modw(dst, src1, src2));
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Long Remainder
++instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (ModL src1 src2));
-+  ins_cost(IDIVDI_COST);
-+  format %{ "rem  $dst, $src1, $src2\t#@modL" %}
++  match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
 +
-+  ins_encode(riscv_enc_mod(dst, src1, src2));
-+  ins_pipe(ialu_reg_reg);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
 +
-+// Integer Shifts
++  effect(TEMP_DEF res);
 +
-+// Shift Left Register
-+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
-+instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (LShiftI src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "sllw  $dst, $src1, $src2\t#@lShiftI_reg_reg" %}
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq"
++  %}
 +
 +  ins_encode %{
-+    __ sllw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg_vshift);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Shift Left Immediate
-+instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
-+  match(Set dst (LShiftI src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "slliw  $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %}
++instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_encode %{
-+    // the shift amount is encoded in the lower
-+    // 5 bits of the I-immediate field for RV32I
-+    __ slliw(as_Register($dst$$reg),
-+             as_Register($src1$$reg),
-+             (unsigned) $src2$$constant & 0x1f);
-+  %}
++  match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
 +
-+  ins_pipe(ialu_reg_shift);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
 +
-+// Shift Right Logical Register
-+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
-+instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (URShiftI src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "srlw  $dst, $src1, $src2\t#@urShiftI_reg_reg" %}
++  effect(TEMP_DEF res);
 +
-+  ins_encode %{
-+    __ srlw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq"
 +  %}
 +
-+  ins_pipe(ialu_reg_reg_vshift);
-+%}
-+
-+// Shift Right Logical Immediate
-+instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
-+  match(Set dst (URShiftI src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "srliw  $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %}
-+
 +  ins_encode %{
-+    // the shift amount is encoded in the lower
-+    // 6 bits of the I-immediate field for RV64I
-+    __ srliw(as_Register($dst$$reg),
-+             as_Register($src1$$reg),
-+             (unsigned) $src2$$constant & 0x1f);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Shift Right Arithmetic Register
-+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
-+instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
-+  match(Set dst (RShiftI src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "sraw  $dst, $src1, $src2\t#@rShiftI_reg_reg" %}
++instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_encode %{
-+    // riscv will sign-ext dst high 32 bits
-+    __ sraw(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
-+  %}
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
 +
-+  ins_pipe(ialu_reg_reg_vshift);
-+%}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST);
 +
-+// Shift Right Arithmetic Immediate
-+instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
-+  match(Set dst (RShiftI src1 src2));
-+  ins_cost(ALU_COST);
-+  format %{ "sraiw  $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %}
++  effect(TEMP_DEF res);
++
++  format %{
++    "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq"
++  %}
 +
 +  ins_encode %{
-+    // riscv will sign-ext dst high 32 bits
-+    __ sraiw(as_Register($dst$$reg),
-+             as_Register($src1$$reg),
-+             (unsigned) $src2$$constant & 0x1f);
++    __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++               /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Long Shifts
++instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
++%{
++  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
 +
-+// Shift Left Register
-+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
-+instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
-+  match(Set dst (LShiftL src1 src2));
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "sll  $dst, $src1, $src2\t#@lShiftL_reg_reg" %}
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{
++    "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapB"
++  %}
 +
 +  ins_encode %{
-+    __ sll(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg_vshift);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Shift Left Immediate
-+instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
-+  match(Set dst (LShiftL src1 src2));
++instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                             iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
++%{
++  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "slli  $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
++
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{
++    "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapS"
++  %}
 +
 +  ins_encode %{
-+    // the shift amount is encoded in the lower
-+    // 6 bits of the I-immediate field for RV64I
-+    __ slli(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (unsigned) $src2$$constant & 0x3f);
++    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                                 /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register,
++                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Shift Right Logical Register
-+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
-+instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
-+  match(Set dst (URShiftL src1 src2));
++instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++%{
++  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "srl  $dst, $src1, $src2\t#@urShiftL_reg_reg" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++
++  format %{
++    "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapI"
++  %}
 +
 +  ins_encode %{
-+    __ srl(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg_vshift);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Shift Right Logical Immediate
-+instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
-+  match(Set dst (URShiftL src1 src2));
++instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
++%{
++  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "srli  $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++
++  format %{
++    "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapL"
++  %}
 +
 +  ins_encode %{
-+    // the shift amount is encoded in the lower
-+    // 6 bits of the I-immediate field for RV64I
-+    __ srli(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (unsigned) $src2$$constant & 0x3f);
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// A special-case pattern for card table stores.
-+instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
-+  match(Set dst (URShiftL (CastP2X src1) src2));
++instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
++%{
++  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "srli  $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
++
++  format %{
++    "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapN"
++  %}
 +
 +  ins_encode %{
-+    // the shift amount is encoded in the lower
-+    // 6 bits of the I-immediate field for RV64I
-+    __ srli(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (unsigned) $src2$$constant & 0x3f);
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Shift Right Arithmetic Register
-+// In RV64I, only the low 5 bits of src2 are considered for the shift amount
-+instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
-+  match(Set dst (RShiftL src1 src2));
++instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++%{
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "sra  $dst, $src1, $src2\t#@rShiftL_reg_reg" %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++
++  format %{
++    "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapP"
++  %}
 +
 +  ins_encode %{
-+    __ sra(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++                    /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg_vshift);
++  ins_pipe(pipe_slow);
 +%}
 +
-+// Shift Right Arithmetic Immediate
-+instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
-+  match(Set dst (RShiftL src1 src2));
++instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "srai  $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %}
++  match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
 +
-+  ins_encode %{
-+    // the shift amount is encoded in the lower
-+    // 6 bits of the I-immediate field for RV64I
-+    __ srai(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (unsigned) $src2$$constant & 0x3f);
-+  %}
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6);
 +
-+  ins_pipe(ialu_reg_shift);
-+%}
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 +
-+instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{
-+  match(Set dst (XorI src1 m1));
-+  ins_cost(ALU_COST);
-+  format %{ "xori  $dst, $src1, -1\t#@regI_not_reg" %}
++  format %{
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapBAcq"
++  %}
 +
 +  ins_encode %{
-+    __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
++    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8,
++                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{
-+  match(Set dst (XorL src1 m1));
-+  ins_cost(ALU_COST);
-+  format %{ "xori  $dst, $src1, -1\t#@regL_not_reg" %}
++instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval,
++                                iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
++
++  match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7);
++
++  effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3);
++
++  format %{
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapSAcq"
++  %}
 +
 +  ins_encode %{
-+    __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
++    __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16,
++                                 /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register,
++                                 $tmp1$$Register, $tmp2$$Register, $tmp3$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_slow);
 +%}
 +
++instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+// ============================================================================
-+// Floating Point Arithmetic Instructions
++  match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
 +
-+instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
-+  match(Set dst (AddF src1 src2));
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fadd.s  $dst, $src1, $src2\t#@addF_reg_reg" %}
++  format %{
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapIAcq"
++  %}
 +
 +  ins_encode %{
-+    __ fadd_s(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32,
++                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_s);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
-+  match(Set dst (AddD src1 src2));
++instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fadd.d  $dst, $src1, $src2\t#@addD_reg_reg" %}
++  match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++
++  format %{
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapLAcq"
++  %}
 +
 +  ins_encode %{
-+    __ fadd_d(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_d);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
-+  match(Set dst (SubF src1 src2));
++instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fsub.s  $dst, $src1, $src2\t#@subF_reg_reg" %}
++  match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4);
++
++  format %{
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapNAcq"
++  %}
 +
 +  ins_encode %{
-+    __ fsub_s(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32,
++                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_s);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
-+  match(Set dst (SubD src1 src2));
++instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fsub.d  $dst, $src1, $src2\t#@subD_reg_reg" %}
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++
++  ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2);
++
++  format %{
++    "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t"
++    "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapPAcq"
++  %}
 +
 +  ins_encode %{
-+    __ fsub_d(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64,
++                    /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register);
++    __ xori($res$$Register, $res$$Register, 1);
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_d);
++  ins_pipe(pipe_slow);
 +%}
 +
-+instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
-+  match(Set dst (MulF src1 src2));
++instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev)
++%{
++  match(Set prev (GetAndSetI mem newv));
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fmul.s  $dst, $src1, $src2\t#@mulF_reg_reg" %}
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchgw  $prev, $newv, [$mem]\t#@get_and_setI" %}
 +
 +  ins_encode %{
-+    __ fmul_s(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_s);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
-+  match(Set dst (MulD src1 src2));
++instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev)
++%{
++  match(Set prev (GetAndSetL mem newv));
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fmul.d  $dst, $src1, $src2\t#@mulD_reg_reg" %}
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setL" %}
 +
 +  ins_encode %{
-+    __ fmul_d(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_d);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// src1 * src2 + src3
-+instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaF src3 (Binary src1 src2)));
++instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev)
++%{
++  match(Set prev (GetAndSetN mem newv));
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fmadd.s  $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %}
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %}
 +
 +  ins_encode %{
-+    __ fmadd_s(as_FloatRegister($dst$$reg),
-+               as_FloatRegister($src1$$reg),
-+               as_FloatRegister($src2$$reg),
-+               as_FloatRegister($src3$$reg));
++    __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// src1 * src2 + src3
-+instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaD src3 (Binary src1 src2)));
++instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev)
++%{
++  match(Set prev (GetAndSetP mem newv));
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fmadd.d  $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %}
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchg  $prev, $newv, [$mem]\t#@get_and_setP" %}
 +
 +  ins_encode %{
-+    __ fmadd_d(as_FloatRegister($dst$$reg),
-+               as_FloatRegister($src1$$reg),
-+               as_FloatRegister($src2$$reg),
-+               as_FloatRegister($src3$$reg));
++    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// src1 * src2 - src3
-+instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
++instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fmsub.s  $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %}
++  match(Set prev (GetAndSetI mem newv));
++
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]\t#@get_and_setIAcq" %}
 +
 +  ins_encode %{
-+    __ fmsub_s(as_FloatRegister($dst$$reg),
-+               as_FloatRegister($src1$$reg),
-+               as_FloatRegister($src2$$reg),
-+               as_FloatRegister($src3$$reg));
++    __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// src1 * src2 - src3
-+instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
++instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fmsub.d  $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %}
++  match(Set prev (GetAndSetL mem newv));
++
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setLAcq" %}
 +
 +  ins_encode %{
-+    __ fmsub_d(as_FloatRegister($dst$$reg),
-+               as_FloatRegister($src1$$reg),
-+               as_FloatRegister($src2$$reg),
-+               as_FloatRegister($src3$$reg));
++    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// -src1 * src2 + src3
-+instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
-+  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
++instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fnmsub.s  $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %}
++  match(Set prev (GetAndSetN mem newv));
++
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %}
 +
 +  ins_encode %{
-+    __ fnmsub_s(as_FloatRegister($dst$$reg),
-+                as_FloatRegister($src1$$reg),
-+                as_FloatRegister($src2$$reg),
-+                as_FloatRegister($src3$$reg));
++    __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// -src1 * src2 + src3
-+instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
-+  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
++instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fnmsub.d  $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %}
++  match(Set prev (GetAndSetP mem newv));
++
++  ins_cost(ALU_COST);
++
++  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]\t#@get_and_setPAcq" %}
 +
 +  ins_encode %{
-+    __ fnmsub_d(as_FloatRegister($dst$$reg),
-+                as_FloatRegister($src1$$reg),
-+                as_FloatRegister($src2$$reg),
-+                as_FloatRegister($src3$$reg));
++    __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// -src1 * src2 - src3
-+instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
-+  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
++instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr)
++%{
++  match(Set newval (GetAndAddL mem incr));
 +
-+  ins_cost(FMUL_SINGLE_COST);
-+  format %{ "fnmadd.s  $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %}
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %}
 +
 +  ins_encode %{
-+    __ fnmadd_s(as_FloatRegister($dst$$reg),
-+                as_FloatRegister($src1$$reg),
-+                as_FloatRegister($src2$$reg),
-+                as_FloatRegister($src3$$reg));
++    __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// -src1 * src2 - src3
-+instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
-+  predicate(UseFMA);
-+  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
-+  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
++instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used());
 +
-+  ins_cost(FMUL_DOUBLE_COST);
-+  format %{ "fnmadd.d  $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %}
++  match(Set dummy (GetAndAddL mem incr));
++
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %}
 +
 +  ins_encode %{
-+    __ fnmadd_d(as_FloatRegister($dst$$reg),
-+                as_FloatRegister($src1$$reg),
-+                as_FloatRegister($src2$$reg),
-+                as_FloatRegister($src3$$reg));
++    __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Math.max(FF)F
-+instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
-+  match(Set dst (MaxF src1 src2));
-+  effect(TEMP_DEF dst, USE src1, USE src2);
++instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr)
++%{
++  match(Set newval (GetAndAddL mem incr));
 +
-+  ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST);
-+  format %{ "fsflags zr\t#@maxF_reg_reg\n\t"
-+            "fmax.s  $dst, $src1, $src2\n\t"
-+            "flt.s   zr, $src1, $src2\n\t"
-+            "frflags t0\n\t"
-+            "beqz    t0, Ldone\n\t"
-+            "fadd.s  $dst, $src1, $src2" %}
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %}
 +
 +  ins_encode %{
-+    __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
-+                 as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ false);
++    __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_s);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Math.min(FF)F
-+instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
-+  match(Set dst (MinF src1 src2));
-+  effect(TEMP_DEF dst, USE src1, USE src2);
++instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used());
++
++  match(Set dummy (GetAndAddL mem incr));
++
++  ins_cost(ALU_COST);
 +
-+  ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST);
-+  format %{ "fsflags zr\t#@minF_reg_reg\n\t"
-+            "fmin.s  $dst, $src1, $src2\n\t"
-+            "flt.s   zr, $src1, $src2\n\t"
-+            "frflags t0\n\t"
-+            "beqz    t0, Ldone\n\t"
-+            "fadd.s  $dst, $src1, $src2" %}
++  format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %}
 +
 +  ins_encode %{
-+    __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
-+                 as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ true);
++    __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_s);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Math.max(DD)D
-+instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
-+  match(Set dst (MaxD src1 src2));
-+  effect(TEMP_DEF dst, USE src1, USE src2);
++instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr)
++%{
++  match(Set newval (GetAndAddI mem incr));
++
++  ins_cost(ALU_COST);
 +
-+  ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST);
-+  format %{ "fsflags zr\t#@maxD_reg_reg\n\t"
-+            "fmax.d  $dst, $src1, $src2\n\t"
-+            "flt.d   zr, $src1, $src2\n\t"
-+            "frflags t0\n\t"
-+            "beqz    t0, Ldone\n\t"
-+            "fadd.d  $dst, $src1, $src2" %}
++  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %}
 +
 +  ins_encode %{
-+    __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
-+                 as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ false);
++    __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_d);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Math.min(DD)D
-+instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
-+  match(Set dst (MinD src1 src2));
-+  effect(TEMP_DEF dst, USE src1, USE src2);
++instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used());
++
++  match(Set dummy (GetAndAddI mem incr));
 +
-+  ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST);
-+  format %{ "fsflags zr\t#@minD_reg_reg\n\t"
-+            "fmin.d  $dst, $src1, $src2\n\t"
-+            "flt.d   zr, $src1, $src2\n\t"
-+            "frflags t0\n\t"
-+            "beqz    t0, Ldone\n\t"
-+            "fadd.d  $dst, $src1, $src2" %}
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %}
 +
 +  ins_encode %{
-+    __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg),
-+                 as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ true);
++    __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_dop_reg_reg_d);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
-+  match(Set dst (DivF src1  src2));
++instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr)
++%{
++  match(Set newval (GetAndAddI mem incr));
 +
-+  ins_cost(FDIV_COST);
-+  format %{ "fdiv.s  $dst, $src1, $src2\t#@divF_reg_reg" %}
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %}
 +
 +  ins_encode %{
-+    __ fdiv_s(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_div_s);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
-+  match(Set dst (DivD src1  src2));
++instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used());
 +
-+  ins_cost(FDIV_COST);
-+  format %{ "fdiv.d  $dst, $src1, $src2\t#@divD_reg_reg" %}
++  match(Set dummy (GetAndAddI mem incr));
++
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %}
 +
 +  ins_encode %{
-+    __ fdiv_d(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src1$$reg),
-+              as_FloatRegister($src2$$reg));
++    __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_div_d);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct negF_reg_reg(fRegF dst, fRegF src) %{
-+  match(Set dst (NegF src));
++instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fsgnjn.s  $dst, $src, $src\t#@negF_reg_reg" %}
++  match(Set newval (GetAndAddL mem incr));
++
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %}
 +
 +  ins_encode %{
-+    __ fneg_s(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src$$reg));
++    __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_uop_s);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct negD_reg_reg(fRegD dst, fRegD src) %{
-+  match(Set dst (NegD src));
++instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
++  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fsgnjn.d  $dst, $src, $src\t#@negD_reg_reg" %}
++  match(Set dummy (GetAndAddL mem incr));
++
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %}
 +
 +  ins_encode %{
-+    __ fneg_d(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src$$reg));
++    __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_uop_d);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{
-+  match(Set dst (AbsI src));
++instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST * 3);
-+  format %{ "sraiw t0, $src, 0x1f\n\t"
-+            "xorr $dst, $src, t0\n\t"
-+            "subw $dst, $dst, t0\t#@absI_reg" %}
++  match(Set newval (GetAndAddL mem incr));
++
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %}
 +
 +  ins_encode %{
-+    __ sraiw(t0, as_Register($src$$reg), 0x1f);
-+    __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0);
-+    __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0);
++    __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct absI2L_reg(iRegLNoSp dst, iRegIorL2I src) %{
-+  match(Set dst (ConvI2L (AbsI src)));
++instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST * 3);
-+  format %{ "sraiw t0, $src, 0x1f\n\t"
-+            "xorr $dst, $src, t0\n\t"
-+            "subw $dst, $dst, t0\t#@absI2L_reg" %}
++  match(Set dummy (GetAndAddL mem incr));
++
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %}
 +
 +  ins_encode %{
-+    __ sraiw(t0, as_Register($src$$reg), 0x1f);
-+    __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0);
-+    __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0);
++    __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct absL_reg(iRegLNoSp dst, iRegL src) %{
-+  match(Set dst (AbsL src));
++instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
 +
-+  ins_cost(ALU_COST * 3);
-+  format %{ "srai t0, $src, 0x3f\n\t"
-+            "xorr $dst, $src, t0\n\t"
-+            "sub $dst, $dst, t0\t#absL_reg" %}
++  match(Set newval (GetAndAddI mem incr));
++
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %}
 +
 +  ins_encode %{
-+    __ srai(t0, as_Register($src$$reg), 0x3f);
-+    __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0);
-+    __ sub(as_Register($dst$$reg), as_Register($dst$$reg), t0);
++    __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct absF_reg(fRegF dst, fRegF src) %{
-+  match(Set dst (AbsF src));
++instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fsgnjx.s  $dst, $src, $src\t#@absF_reg" %}
-+  ins_encode %{
-+    __ fabs_s(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src$$reg));
-+  %}
++  match(Set dummy (GetAndAddI mem incr));
 +
-+  ins_pipe(fp_uop_s);
-+%}
++  ins_cost(ALU_COST);
 +
-+instruct absD_reg(fRegD dst, fRegD src) %{
-+  match(Set dst (AbsD src));
++  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %}
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fsgnjx.d  $dst, $src, $src\t#@absD_reg" %}
 +  ins_encode %{
-+    __ fabs_d(as_FloatRegister($dst$$reg),
-+              as_FloatRegister($src$$reg));
++    __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_uop_d);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct sqrtF_reg(fRegF dst, fRegF src) %{
-+  match(Set dst (SqrtF src));
++instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr)
++%{
++  predicate(needs_acquiring_load_reserved(n));
++
++  match(Set newval (GetAndAddI mem incr));
++
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %}
 +
-+  ins_cost(FSQRT_COST);
-+  format %{ "fsqrt.s  $dst, $src\t#@sqrtF_reg" %}
 +  ins_encode %{
-+    __ fsqrt_s(as_FloatRegister($dst$$reg),
-+               as_FloatRegister($src$$reg));
++    __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_sqrt_s);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct sqrtD_reg(fRegD dst, fRegD src) %{
-+  match(Set dst (SqrtD src));
++instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr)
++%{
++  predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n));
++
++  match(Set dummy (GetAndAddI mem incr));
++
++  ins_cost(ALU_COST);
++
++  format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %}
 +
-+  ins_cost(FSQRT_COST);
-+  format %{ "fsqrt.d  $dst, $src\t#@sqrtD_reg" %}
 +  ins_encode %{
-+    __ fsqrt_d(as_FloatRegister($dst$$reg),
-+               as_FloatRegister($src$$reg));
++    __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
 +  %}
 +
-+  ins_pipe(fp_sqrt_d);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Arithmetic Instructions End
-+
 +// ============================================================================
-+// Logical Instructions
++// Arithmetic Instructions
++//
 +
-+// Register And
-+instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  match(Set dst (AndI src1 src2));
++// Integer Addition
 +
-+  format %{ "andr  $dst, $src1, $src2\t#@andI_reg_reg" %}
++// TODO
++// these currently employ operations which do not set CR and hence are
++// not flagged as killing CR but we would like to isolate the cases
++// where we want to set flags from those where we don't. need to work
++// out how to do that.
++instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (AddI src1 src2));
 +
 +  ins_cost(ALU_COST);
++  format %{ "addw  $dst, $src1, $src2\t#@addI_reg_reg" %}
++
 +  ins_encode %{
-+    __ andr(as_Register($dst$$reg),
++    Assembler::CompressibleRegion cr(&_masm);
++    __ addw(as_Register($dst$$reg),
 +            as_Register($src1$$reg),
 +            as_Register($src2$$reg));
 +  %}
@@ -35186,31 +33783,49 @@ index 000000000..137e9b7c7
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// Immediate And
-+instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
-+  match(Set dst (AndI src1 src2));
-+
-+  format %{ "andi  $dst, $src1, $src2\t#@andI_reg_imm" %}
++instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{
++  match(Set dst (AddI src1 src2));
 +
 +  ins_cost(ALU_COST);
++  format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm" %}
++
 +  ins_encode %{
-+    __ andi(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (int32_t)($src2$$constant));
++    Assembler::CompressibleRegion cr(&_masm);
++    int32_t con = (int32_t)$src2$$constant;
++    __ addiw(as_Register($dst$$reg),
++             as_Register($src1$$reg),
++             $src2$$constant);
 +  %}
 +
 +  ins_pipe(ialu_reg_imm);
 +%}
 +
-+// Register Or
-+instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  match(Set dst (OrI src1 src2));
++instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{
++  match(Set dst (AddI (ConvL2I src1) src2));
 +
-+  format %{ "orr  $dst, $src1, $src2\t#@orI_reg_reg" %}
++  ins_cost(ALU_COST);
++  format %{ "addiw  $dst, $src1, $src2\t#@addI_reg_imm_l2i" %}
++
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ addiw(as_Register($dst$$reg),
++             as_Register($src1$$reg),
++             $src2$$constant);
++  %}
++
++  ins_pipe(ialu_reg_imm);
++%}
++
++// Pointer Addition
++instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
++  match(Set dst (AddP src1 src2));
 +
 +  ins_cost(ALU_COST);
++  format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %}
++
 +  ins_encode %{
-+    __ orr(as_Register($dst$$reg),
++    Assembler::CompressibleRegion cr(&_masm);
++    __ add(as_Register($dst$$reg),
 +           as_Register($src1$$reg),
 +           as_Register($src2$$reg));
 +  %}
@@ -35218,63 +33833,82 @@ index 000000000..137e9b7c7
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// Immediate Or
-+instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
-+  match(Set dst (OrI src1 src2));
-+
-+  format %{ "ori  $dst, $src1, $src2\t#@orI_reg_imm" %}
-+
++// If we shift more than 32 bits, we need not convert I2L.
++instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{
++  match(Set dst (LShiftL (ConvI2L src) scale));
 +  ins_cost(ALU_COST);
++  format %{ "slli  $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %}
++
 +  ins_encode %{
-+    __ ori(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           (int32_t)($src2$$constant));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63);
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+// Register Xor
-+instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  match(Set dst (XorI src1 src2));
++// Pointer Immediate Addition
++// n.b. this needs to be more expensive than using an indirect memory
++// operand
++instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{
++  match(Set dst (AddP src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "addi  $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %}
++
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    // src2 is imm, so actually call the addi
++    __ add(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           $src2$$constant);
++  %}
 +
-+  format %{ "xorr  $dst, $src1, $src2\t#@xorI_reg_reg" %}
++  ins_pipe(ialu_reg_imm);
++%}
 +
++// Long Addition
++instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (AddL src1 src2));
 +  ins_cost(ALU_COST);
++  format %{ "add  $dst, $src1, $src2\t#@addL_reg_reg" %}
++
 +  ins_encode %{
-+    __ xorr(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ add(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
 +
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// Immediate Xor
-+instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
-+  match(Set dst (XorI src1 src2));
-+
-+  format %{ "xori  $dst, $src1, $src2\t#@xorI_reg_imm" %}
-+
++// No constant pool entries requiredLong Immediate Addition.
++instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
++  match(Set dst (AddL src1 src2));
 +  ins_cost(ALU_COST);
++  format %{ "addi  $dst, $src1, $src2\t#@addL_reg_imm" %}
++
 +  ins_encode %{
-+    __ xori(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (int32_t)($src2$$constant));
++    Assembler::CompressibleRegion cr(&_masm);
++    // src2 is imm, so actually call the addi
++    __ add(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           $src2$$constant);
 +  %}
 +
 +  ins_pipe(ialu_reg_imm);
 +%}
 +
-+// Register And Long
-+instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (AndL src1 src2));
-+
-+  format %{ "andr  $dst, $src1, $src2\t#@andL_reg_reg" %}
++// Integer Subtraction
++instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (SubI src1 src2));
 +
 +  ins_cost(ALU_COST);
++  format %{ "subw  $dst, $src1, $src2\t#@subI_reg_reg" %}
++
 +  ins_encode %{
-+    __ andr(as_Register($dst$$reg),
++    Assembler::CompressibleRegion cr(&_masm);
++    __ subw(as_Register($dst$$reg),
 +            as_Register($src1$$reg),
 +            as_Register($src2$$reg));
 +  %}
@@ -35282,31 +33916,33 @@ index 000000000..137e9b7c7
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// Immediate And Long
-+instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
-+  match(Set dst (AndL src1 src2));
-+
-+  format %{ "andi  $dst, $src1, $src2\t#@andL_reg_imm" %}
++// Immediate Subtraction
++instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{
++  match(Set dst (SubI src1 src2));
 +
 +  ins_cost(ALU_COST);
++  format %{ "addiw  $dst, $src1, -$src2\t#@subI_reg_imm" %}
++
 +  ins_encode %{
-+    __ andi(as_Register($dst$$reg),
++    Assembler::CompressibleRegion cr(&_masm);
++    // src2 is imm, so actually call the addiw
++    __ subw(as_Register($dst$$reg),
 +            as_Register($src1$$reg),
-+            (int32_t)($src2$$constant));
++            $src2$$constant);
 +  %}
 +
 +  ins_pipe(ialu_reg_imm);
 +%}
 +
-+// Register Or Long
-+instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (OrL src1 src2));
-+
-+  format %{ "orr  $dst, $src1, $src2\t#@orL_reg_reg" %}
-+
++// Long Subtraction
++instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (SubL src1 src2));
 +  ins_cost(ALU_COST);
++  format %{ "sub  $dst, $src1, $src2\t#@subL_reg_reg" %}
++
 +  ins_encode %{
-+    __ orr(as_Register($dst$$reg),
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sub(as_Register($dst$$reg),
 +           as_Register($src1$$reg),
 +           as_Register($src2$$reg));
 +  %}
@@ -35314,5301 +33950,4461 @@ index 000000000..137e9b7c7
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// Immediate Or Long
-+instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
-+  match(Set dst (OrL src1 src2));
-+
-+  format %{ "ori  $dst, $src1, $src2\t#@orL_reg_imm" %}
-+
++// No constant pool entries requiredLong Immediate Subtraction.
++instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{
++  match(Set dst (SubL src1 src2));
 +  ins_cost(ALU_COST);
++  format %{ "addi  $dst, $src1, -$src2\t#@subL_reg_imm" %}
++
 +  ins_encode %{
-+    __ ori(as_Register($dst$$reg),
++    Assembler::CompressibleRegion cr(&_masm);
++    // src2 is imm, so actually call the addi
++    __ sub(as_Register($dst$$reg),
 +           as_Register($src1$$reg),
-+           (int32_t)($src2$$constant));
++           $src2$$constant);
 +  %}
 +
 +  ins_pipe(ialu_reg_imm);
 +%}
 +
-+// Register Xor Long
-+instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
-+  match(Set dst (XorL src1 src2));
-+
-+  format %{ "xorr  $dst, $src1, $src2\t#@xorL_reg_reg" %}
++// Integer Negation (special case for sub)
 +
++instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
++  match(Set dst (SubI zero src));
 +  ins_cost(ALU_COST);
++  format %{ "subw  $dst, x0, $src\t# int, #@negI_reg" %}
++
 +  ins_encode %{
-+    __ xorr(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
++    // actually call the subw
++    __ negw(as_Register($dst$$reg),
++            as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// Immediate Xor Long
-+instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
-+  match(Set dst (XorL src1 src2));
++// Long Negation
 +
++instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{
++  match(Set dst (SubL zero src));
 +  ins_cost(ALU_COST);
-+  format %{ "xori  $dst, $src1, $src2\t#@xorL_reg_imm" %}
++  format %{ "sub  $dst, x0, $src\t# long, #@negL_reg" %}
 +
 +  ins_encode %{
-+    __ xori(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            (int32_t)($src2$$constant));
++    // actually call the sub
++    __ neg(as_Register($dst$$reg),
++           as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg_imm);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// ============================================================================
-+// BSWAP Instructions
-+
-+instruct bytes_reverse_int(rFlagsReg cr, iRegINoSp dst, iRegIorL2I src) %{
-+  match(Set dst (ReverseBytesI src));
-+  effect(TEMP cr);
++// Integer Multiply
 +
-+  ins_cost(ALU_COST * 13);
-+  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int" %}
++instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (MulI src1 src2));
++  ins_cost(IMUL_COST);
++  format %{ "mulw  $dst, $src1, $src2\t#@mulI" %}
 +
++  //this means 2 word multi, and no sign extend to 64 bits
 +  ins_encode %{
-+    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
++    // riscv64 mulw will sign-extension to high 32 bits in dst reg
++    __ mulw(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(imul_reg_reg);
 +%}
 +
-+instruct bytes_reverse_long(rFlagsReg cr, iRegLNoSp dst, iRegL src) %{
-+  match(Set dst (ReverseBytesL src));
-+  effect(TEMP cr);
++// Long Multiply
 +
-+  ins_cost(ALU_COST * 29);
-+  format %{ "revb  $dst, $src\t#@bytes_reverse_long" %}
++instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (MulL src1 src2));
++  ins_cost(IMUL_COST);
++  format %{ "mul  $dst, $src1, $src2\t#@mulL" %}
 +
 +  ins_encode %{
-+    __ revb(as_Register($dst$$reg), as_Register($src$$reg));
++    __ mul(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(lmul_reg_reg);
 +%}
 +
-+instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
-+  match(Set dst (ReverseBytesUS src));
-+
-+  ins_cost(ALU_COST * 5);
-+  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short" %}
++instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2)
++%{
++  match(Set dst (MulHiL src1 src2));
++  ins_cost(IMUL_COST);
++  format %{ "mulh  $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %}
 +
 +  ins_encode %{
-+    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
++    __ mulh(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(lmul_reg_reg);
 +%}
 +
-+instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
-+  match(Set dst (ReverseBytesS src));
++// Integer Divide
 +
-+  ins_cost(ALU_COST * 5);
-+  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short" %}
++instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (DivI src1 src2));
++  ins_cost(IDIVSI_COST);
++  format %{ "divw  $dst, $src1, $src2\t#@divI"%}
++
++  ins_encode(riscv_enc_divw(dst, src1, src2));
++  ins_pipe(idiv_reg_reg);
++%}
++
++instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
++  match(Set dst (URShiftI (RShiftI src1 div1) div2));
++  ins_cost(ALU_COST);
++  format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %}
 +
 +  ins_encode %{
-+    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
++    __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
 +  %}
-+
-+  ins_pipe(ialu_reg);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+// ============================================================================
-+// MemBar Instruction
++// Long Divide
 +
-+instruct load_fence() %{
-+  match(LoadFence);
-+  ins_cost(ALU_COST);
++instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (DivL src1 src2));
++  ins_cost(IDIVDI_COST);
++  format %{ "div  $dst, $src1, $src2\t#@divL" %}
 +
-+  format %{ "#@load_fence" %}
++  ins_encode(riscv_enc_div(dst, src1, src2));
++  ins_pipe(ldiv_reg_reg);
++%}
++
++instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
++  match(Set dst (URShiftL (RShiftL src1 div1) div2));
++  ins_cost(ALU_COST);
++  format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %}
 +
 +  ins_encode %{
-+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63);
 +  %}
-+  ins_pipe(pipe_serial);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+instruct membar_acquire() %{
-+  match(MemBarAcquire);
-+  ins_cost(ALU_COST);
++// Integer Remainder
 +
-+  format %{ "#@membar_acquire\n\t"
-+            "fence ir iorw" %}
++instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (ModI src1 src2));
++  ins_cost(IDIVSI_COST);
++  format %{ "remw  $dst, $src1, $src2\t#@modI" %}
 +
-+  ins_encode %{
-+    __ block_comment("membar_acquire");
-+    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
-+  %}
++  ins_encode(riscv_enc_modw(dst, src1, src2));
++  ins_pipe(ialu_reg_reg);
++%}
 +
-+  ins_pipe(pipe_serial);
++// Long Remainder
++
++instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (ModL src1 src2));
++  ins_cost(IDIVDI_COST);
++  format %{ "rem  $dst, $src1, $src2\t#@modL" %}
++
++  ins_encode(riscv_enc_mod(dst, src1, src2));
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct membar_acquire_lock() %{
-+  match(MemBarAcquireLock);
-+  ins_cost(0);
++// Integer Shifts
 +
-+  format %{ "#@membar_acquire_lock (elided)" %}
++// Shift Left Register
++// In RV64I, only the low 5 bits of src2 are considered for the shift amount
++instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (LShiftI src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "sllw  $dst, $src1, $src2\t#@lShiftI_reg_reg" %}
 +
 +  ins_encode %{
-+    __ block_comment("membar_acquire_lock (elided)");
++    __ sllw(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(ialu_reg_reg_vshift);
 +%}
 +
-+instruct store_fence() %{
-+  match(StoreFence);
++// Shift Left Immediate
++instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
++  match(Set dst (LShiftI src1 src2));
 +  ins_cost(ALU_COST);
-+
-+  format %{ "#@store_fence" %}
++  format %{ "slliw  $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %}
 +
 +  ins_encode %{
-+    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++    // the shift amount is encoded in the lower
++    // 5 bits of the I-immediate field for RV32I
++    __ slliw(as_Register($dst$$reg),
++             as_Register($src1$$reg),
++             (unsigned) $src2$$constant & 0x1f);
 +  %}
-+  ins_pipe(pipe_serial);
++
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+instruct membar_release() %{
-+  match(MemBarRelease);
++// Shift Right Logical Register
++// In RV64I, only the low 5 bits of src2 are considered for the shift amount
++instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (URShiftI src1 src2));
 +  ins_cost(ALU_COST);
-+
-+  format %{ "#@membar_release\n\t"
-+            "fence iorw ow" %}
++  format %{ "srlw  $dst, $src1, $src2\t#@urShiftI_reg_reg" %}
 +
 +  ins_encode %{
-+    __ block_comment("membar_release");
-+    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
++    __ srlw(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
-+  ins_pipe(pipe_serial);
++
++  ins_pipe(ialu_reg_reg_vshift);
 +%}
 +
-+instruct membar_storestore() %{
-+  match(MemBarStoreStore);
++// Shift Right Logical Immediate
++instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
++  match(Set dst (URShiftI src1 src2));
 +  ins_cost(ALU_COST);
-+
-+  format %{ "MEMBAR-store-store\t#@membar_storestore" %}
++  format %{ "srliw  $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %}
 +
 +  ins_encode %{
-+    __ membar(MacroAssembler::StoreStore);
++    // the shift amount is encoded in the lower
++    // 6 bits of the I-immediate field for RV64I
++    __ srliw(as_Register($dst$$reg),
++             as_Register($src1$$reg),
++             (unsigned) $src2$$constant & 0x1f);
 +  %}
-+  ins_pipe(pipe_serial);
-+%}
 +
-+instruct membar_release_lock() %{
-+  match(MemBarReleaseLock);
-+  ins_cost(0);
++  ins_pipe(ialu_reg_shift);
++%}
 +
-+  format %{ "#@membar_release_lock (elided)" %}
++// Shift Right Arithmetic Register
++// In RV64I, only the low 5 bits of src2 are considered for the shift amount
++instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
++  match(Set dst (RShiftI src1 src2));
++  ins_cost(ALU_COST);
++  format %{ "sraw  $dst, $src1, $src2\t#@rShiftI_reg_reg" %}
 +
 +  ins_encode %{
-+    __ block_comment("membar_release_lock (elided)");
++    // riscv will sign-ext dst high 32 bits
++    __ sraw(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(ialu_reg_reg_vshift);
 +%}
 +
-+instruct membar_volatile() %{
-+  match(MemBarVolatile);
++// Shift Right Arithmetic Immediate
++instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
++  match(Set dst (RShiftI src1 src2));
 +  ins_cost(ALU_COST);
-+
-+  format %{ "#@membar_volatile\n\t"
-+             "fence iorw iorw"%}
++  format %{ "sraiw  $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %}
 +
 +  ins_encode %{
-+    __ block_comment("membar_volatile");
-+    __ membar(MacroAssembler::StoreLoad);
++    // riscv will sign-ext dst high 32 bits
++    __ sraiw(as_Register($dst$$reg),
++             as_Register($src1$$reg),
++             (unsigned) $src2$$constant & 0x1f);
 +  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+// ============================================================================
-+// Cast Instructions (Java-level type cast)
++// Long Shifts
 +
-+instruct castX2P(iRegPNoSp dst, iRegL src) %{
-+  match(Set dst (CastX2P src));
++// Shift Left Register
++// In RV64I, only the low 6 bits of src2 are considered for the shift amount
++instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
++  match(Set dst (LShiftL src1 src2));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $src\t# long -> ptr, #@castX2P" %}
++  format %{ "sll  $dst, $src1, $src2\t#@lShiftL_reg_reg" %}
 +
 +  ins_encode %{
-+    if ($dst$$reg != $src$$reg) {
-+      __ mv(as_Register($dst$$reg), as_Register($src$$reg));
-+    }
++    __ sll(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(ialu_reg_reg_vshift);
 +%}
 +
-+instruct castP2X(iRegLNoSp dst, iRegP src) %{
-+  match(Set dst (CastP2X src));
++// Shift Left Immediate
++instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
++  match(Set dst (LShiftL src1 src2));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $src\t# ptr -> long, #@castP2X" %}
++  format %{ "slli  $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %}
 +
 +  ins_encode %{
-+    if ($dst$$reg != $src$$reg) {
-+      __ mv(as_Register($dst$$reg), as_Register($src$$reg));
-+    }
++    Assembler::CompressibleRegion cr(&_masm);
++    // the shift amount is encoded in the lower
++    // 6 bits of the I-immediate field for RV64I
++    __ slli(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (unsigned) $src2$$constant & 0x3f);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+instruct castPP(iRegPNoSp dst)
-+%{
-+  match(Set dst (CastPP dst));
-+  ins_cost(0);
++// Shift Right Logical Register
++// In RV64I, only the low 6 bits of src2 are considered for the shift amount
++instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
++  match(Set dst (URShiftL src1 src2));
 +
-+  size(0);
-+  format %{ "# castPP of $dst, #@castPP" %}
-+  ins_encode(/* empty encoding */);
-+  ins_pipe(pipe_class_empty);
-+%}
++  ins_cost(ALU_COST);
++  format %{ "srl  $dst, $src1, $src2\t#@urShiftL_reg_reg" %}
 +
-+instruct castII(iRegI dst)
-+%{
-+  match(Set dst (CastII dst));
++  ins_encode %{
++    __ srl(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
++  %}
 +
-+  size(0);
-+  format %{ "# castII of $dst, #@castII" %}
-+  ins_encode(/* empty encoding */);
-+  ins_cost(0);
-+  ins_pipe(pipe_class_empty);
++  ins_pipe(ialu_reg_reg_vshift);
 +%}
 +
-+instruct checkCastPP(iRegPNoSp dst)
-+%{
-+  match(Set dst (CheckCastPP dst));
++// Shift Right Logical Immediate
++instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
++  match(Set dst (URShiftL src1 src2));
 +
-+  size(0);
-+  ins_cost(0);
-+  format %{ "# checkcastPP of $dst, #@checkCastPP" %}
-+  ins_encode(/* empty encoding */);
-+  ins_pipe(pipe_class_empty);
-+%}
++  ins_cost(ALU_COST);
++  format %{ "srli  $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %}
 +
-+// ============================================================================
-+// Convert Instructions
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    // the shift amount is encoded in the lower
++    // 6 bits of the I-immediate field for RV64I
++    __ srli(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (unsigned) $src2$$constant & 0x3f);
++  %}
 +
-+// int to bool
-+instruct convI2Bool(iRegINoSp dst, iRegI src)
-+%{
-+  match(Set dst (Conv2B src));
++  ins_pipe(ialu_reg_shift);
++%}
++
++// A special-case pattern for card table stores.
++instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
++  match(Set dst (URShiftL (CastP2X src1) src2));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "snez  $dst, $src\t#@convI2Bool" %}
++  format %{ "srli  $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %}
 +
 +  ins_encode %{
-+    __ snez(as_Register($dst$$reg), as_Register($src$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    // the shift amount is encoded in the lower
++    // 6 bits of the I-immediate field for RV64I
++    __ srli(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (unsigned) $src2$$constant & 0x3f);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+// pointer to bool
-+instruct convP2Bool(iRegINoSp dst, iRegP src)
-+%{
-+  match(Set dst (Conv2B src));
++// Shift Right Arithmetic Register
++// In RV64I, only the low 6 bits of src2 are considered for the shift amount
++instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
++  match(Set dst (RShiftL src1 src2));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "snez  $dst, $src\t#@convP2Bool" %}
++  format %{ "sra  $dst, $src1, $src2\t#@rShiftL_reg_reg" %}
 +
 +  ins_encode %{
-+    __ snez(as_Register($dst$$reg), as_Register($src$$reg));
++    __ sra(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(ialu_reg_reg_vshift);
 +%}
 +
-+// int <-> long
-+
-+instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
-+%{
-+  match(Set dst (ConvI2L src));
++// Shift Right Arithmetic Immediate
++instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
++  match(Set dst (RShiftL src1 src2));
 +
 +  ins_cost(ALU_COST);
-+  format %{ "addw  $dst, $src, zr\t#@convI2L_reg_reg" %}
++  format %{ "srai  $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %}
++
 +  ins_encode %{
-+    __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
++    Assembler::CompressibleRegion cr(&_masm);
++    // the shift amount is encoded in the lower
++    // 6 bits of the I-immediate field for RV64I
++    __ srai(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (unsigned) $src2$$constant & 0x3f);
 +  %}
-+  ins_pipe(ialu_reg);
-+%}
 +
-+instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
-+  match(Set dst (ConvL2I src));
++  ins_pipe(ialu_reg_shift);
++%}
 +
++instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{
++  match(Set dst (XorI src1 m1));
 +  ins_cost(ALU_COST);
-+  format %{ "addw  $dst, $src, zr\t#@convL2I_reg" %}
++  format %{ "xori  $dst, $src1, -1\t#@regI_not_reg" %}
 +
 +  ins_encode %{
-+    __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
++    __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
 +  %}
 +
 +  ins_pipe(ialu_reg);
 +%}
 +
-+// int to unsigned long (Zero-extend)
-+instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
-+%{
-+  match(Set dst (AndL (ConvI2L src) mask));
-+
-+  ins_cost(ALU_COST * 2);
-+  format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %}
++instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{
++  match(Set dst (XorL src1 m1));
++  ins_cost(ALU_COST);
++  format %{ "xori  $dst, $src1, -1\t#@regL_not_reg" %}
 +
 +  ins_encode %{
-+    __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32);
++    __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1);
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// float <-> double
 +
-+instruct convF2D_reg(fRegD dst, fRegF src) %{
-+  match(Set dst (ConvF2D src));
++// ============================================================================
++// Floating Point Arithmetic Instructions
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.d.s  $dst, $src\t#@convF2D_reg" %}
++instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
++  match(Set dst (AddF src1 src2));
++
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fadd.s  $dst, $src1, $src2\t#@addF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
++    __ fadd_s(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(fp_f2d);
++  ins_pipe(fp_dop_reg_reg_s);
 +%}
 +
-+instruct convD2F_reg(fRegF dst, fRegD src) %{
-+  match(Set dst (ConvD2F src));
++instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
++  match(Set dst (AddD src1 src2));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.s.d  $dst, $src\t#@convD2F_reg" %}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fadd.d  $dst, $src1, $src2\t#@addD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
++    __ fadd_d(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(fp_d2f);
++  ins_pipe(fp_dop_reg_reg_d);
 +%}
 +
-+// float <-> int
-+
-+instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{
-+  match(Set dst (ConvF2I src));
++instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
++  match(Set dst (SubF src1 src2));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.w.s  $dst, $src\t#@convF2I_reg_reg" %}
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fsub.s  $dst, $src1, $src2\t#@subF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister);
++    __ fsub_s(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(fp_f2i);
++  ins_pipe(fp_dop_reg_reg_s);
 +%}
 +
-+instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{
-+  match(Set dst (ConvI2F src));
++instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
++  match(Set dst (SubD src1 src2));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.s.w  $dst, $src\t#@convI2F_reg_reg" %}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fsub.d  $dst, $src1, $src2\t#@subD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++    __ fsub_d(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(fp_i2f);
++  ins_pipe(fp_dop_reg_reg_d);
 +%}
 +
-+// float <-> long
-+
-+instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{
-+  match(Set dst (ConvF2L src));
++instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
++  match(Set dst (MulF src1 src2));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.l.s  $dst, $src\t#@convF2L_reg_reg" %}
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fmul.s  $dst, $src1, $src2\t#@mulF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister);
++    __ fmul_s(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(fp_f2l);
++  ins_pipe(fp_dop_reg_reg_s);
 +%}
 +
-+instruct convL2F_reg_reg(fRegF dst, iRegL src) %{
-+  match(Set dst (ConvL2F src));
++instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
++  match(Set dst (MulD src1 src2));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.s.l  $dst, $src\t#@convL2F_reg_reg" %}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fmul.d  $dst, $src1, $src2\t#@mulD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++    __ fmul_d(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(fp_l2f);
++  ins_pipe(fp_dop_reg_reg_d);
 +%}
 +
-+// double <-> int
-+
-+instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{
-+  match(Set dst (ConvD2I src));
++// src1 * src2 + src3
++instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary src1 src2)));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.w.d  $dst, $src\t#@convD2I_reg_reg" %}
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fmadd.s  $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister);
++    __ fmadd_s(as_FloatRegister($dst$$reg),
++               as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg),
++               as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(fp_d2i);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{
-+  match(Set dst (ConvI2D src));
++// src1 * src2 + src3
++instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary src1 src2)));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.d.w  $dst, $src\t#@convI2D_reg_reg" %}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fmadd.d  $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++    __ fmadd_d(as_FloatRegister($dst$$reg),
++               as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg),
++               as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(fp_i2d);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+// double <-> long
-+
-+instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
-+  match(Set dst (ConvD2L src));
++// src1 * src2 - src3
++instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.l.d  $dst, $src\t#@convD2L_reg_reg" %}
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fmsub.s  $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister);
++    __ fmsub_s(as_FloatRegister($dst$$reg),
++               as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg),
++               as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(fp_d2l);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+instruct convL2D_reg_reg(fRegD dst, iRegL src) %{
-+  match(Set dst (ConvL2D src));
++// src1 * src2 - src3
++instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
 +
-+  ins_cost(XFER_COST);
-+  format %{ "fcvt.d.l  $dst, $src\t#@convL2D_reg_reg" %}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fmsub.d  $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++    __ fmsub_d(as_FloatRegister($dst$$reg),
++               as_FloatRegister($src1$$reg),
++               as_FloatRegister($src2$$reg),
++               as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(fp_l2d);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+// Convert oop into int for vectors alignment masking
-+instruct convP2I(iRegINoSp dst, iRegP src) %{
-+  match(Set dst (ConvL2I (CastP2X src)));
++// -src1 * src2 + src3
++instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
++  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
 +
-+  ins_cost(ALU_COST * 2);
-+  format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %}
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fnmsub.s  $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ zero_extend($dst$$Register, $src$$Register, 32);
++    __ fnmsub_s(as_FloatRegister($dst$$reg),
++                as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg),
++                as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+// Convert compressed oop into int for vectors alignment masking
-+// in case of 32bit oops (heap < 4Gb).
-+instruct convN2I(iRegINoSp dst, iRegN src)
-+%{
-+  predicate(Universe::narrow_oop_shift() == 0);
-+  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
++// -src1 * src2 + src3
++instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
++  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "mv  $dst, $src\t# compressed ptr -> int, #@convN2I" %}
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fnmsub.d  $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ mv($dst$$Register, $src$$Register);
++    __ fnmsub_d(as_FloatRegister($dst$$reg),
++                as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg),
++                as_FloatRegister($src3$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+// Convert oop pointer into compressed form
-+instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
-+  match(Set dst (EncodeP src));
-+  ins_cost(ALU_COST);
-+  format %{ "encode_heap_oop  $dst, $src\t#@encodeHeapOop" %}
-+  ins_encode %{
-+    Register s = $src$$Register;
-+    Register d = $dst$$Register;
-+    __ encode_heap_oop(d, s);
-+  %}
-+  ins_pipe(ialu_reg);
-+%}
++// -src1 * src2 - src3
++instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
++  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
 +
-+instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{
-+  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
-+            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
-+  match(Set dst (DecodeN src));
++  ins_cost(FMUL_SINGLE_COST);
++  format %{ "fnmadd.s  $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %}
 +
-+  ins_cost(0);
-+  format %{ "decode_heap_oop  $dst, $src\t#@decodeHeapOop" %}
 +  ins_encode %{
-+    Register s = $src$$Register;
-+    Register d = $dst$$Register;
-+    __ decode_heap_oop(d, s);
++    __ fnmadd_s(as_FloatRegister($dst$$reg),
++                as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg),
++                as_FloatRegister($src3$$reg));
 +  %}
-+  ins_pipe(ialu_reg);
++
++  ins_pipe(pipe_class_default);
 +%}
 +
-+instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{
-+  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
-+            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
-+  match(Set dst (DecodeN src));
++// -src1 * src2 - src3
++instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
++  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
++
++  ins_cost(FMUL_DOUBLE_COST);
++  format %{ "fnmadd.d  $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %}
 +
-+  ins_cost(0);
-+  format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %}
 +  ins_encode %{
-+    Register s = $src$$Register;
-+    Register d = $dst$$Register;
-+    __ decode_heap_oop_not_null(d, s);
++    __ fnmadd_d(as_FloatRegister($dst$$reg),
++                as_FloatRegister($src1$$reg),
++                as_FloatRegister($src2$$reg),
++                as_FloatRegister($src3$$reg));
 +  %}
-+  ins_pipe(ialu_reg);
++
++  ins_pipe(pipe_class_default);
 +%}
 +
-+// Convert klass pointer into compressed form.
-+instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
-+  match(Set dst (EncodePKlass src));
++// Math.max(FF)F
++instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
++  match(Set dst (MaxF src1 src2));
++  effect(TEMP_DEF dst);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "encode_klass_not_null  $dst, $src\t#@encodeKlass_not_null" %}
++  format %{ "maxF $dst, $src1, $src2" %}
 +
 +  ins_encode %{
-+    Register src_reg = as_Register($src$$reg);
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ encode_klass_not_null(dst_reg, src_reg, t0);
++    __ minmax_FD(as_FloatRegister($dst$$reg),
++                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
++                 false /* is_double */, false /* is_min */);
 +  %}
 +
-+   ins_pipe(ialu_reg);
++  ins_pipe(fp_dop_reg_reg_s);
 +%}
 +
-+instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
-+  predicate(!maybe_use_tmp_register_decoding_klass());
-+
-+  match(Set dst (DecodeNKlass src));
++// Math.min(FF)F
++instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
++  match(Set dst (MinF src1 src2));
++  effect(TEMP_DEF dst);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "decode_klass_not_null  $dst, $src\t#@decodeKlass_not_null" %}
++  format %{ "minF $dst, $src1, $src2" %}
 +
 +  ins_encode %{
-+    Register src_reg = as_Register($src$$reg);
-+    Register dst_reg = as_Register($dst$$reg);
-+    __ decode_klass_not_null(dst_reg, src_reg, UseCompressedOops ? xheapbase : t0);
++    __ minmax_FD(as_FloatRegister($dst$$reg),
++                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
++                 false /* is_double */, true /* is_min */);
 +  %}
 +
-+   ins_pipe(ialu_reg);
++  ins_pipe(fp_dop_reg_reg_s);
 +%}
 +
-+instruct decodeKlass_not_null_with_tmp(iRegPNoSp dst, iRegN src, rFlagsReg tmp) %{
-+  predicate(maybe_use_tmp_register_decoding_klass());
-+
-+  match(Set dst (DecodeNKlass src));
-+
-+  effect(TEMP tmp);
++// Math.max(DD)D
++instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
++  match(Set dst (MaxD src1 src2));
++  effect(TEMP_DEF dst);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "decode_klass_not_null  $dst, $src\t#@decodeKlass_not_null" %}
++  format %{ "maxD $dst, $src1, $src2" %}
 +
 +  ins_encode %{
-+    Register src_reg = as_Register($src$$reg);
-+    Register dst_reg = as_Register($dst$$reg);
-+    Register tmp_reg = as_Register($tmp$$reg);
-+    __ decode_klass_not_null(dst_reg, src_reg, tmp_reg);
++    __ minmax_FD(as_FloatRegister($dst$$reg),
++                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
++                 true /* is_double */, false /* is_min */);
 +  %}
 +
-+   ins_pipe(ialu_reg);
++  ins_pipe(fp_dop_reg_reg_d);
 +%}
 +
-+// stack <-> reg and reg <-> reg shuffles with no conversion
++// Math.min(DD)D
++instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
++  match(Set dst (MinD src1 src2));
++  effect(TEMP_DEF dst);
 +
-+instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
++  format %{ "minD $dst, $src1, $src2" %}
 +
-+  match(Set dst (MoveF2I src));
++  ins_encode %{
++    __ minmax_FD(as_FloatRegister($dst$$reg),
++                 as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg),
++                 true /* is_double */, true /* is_min */);
++  %}
 +
-+  effect(DEF dst, USE src);
++  ins_pipe(fp_dop_reg_reg_d);
++%}
 +
-+  ins_cost(LOAD_COST);
++instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{
++  match(Set dst (DivF src1  src2));
 +
-+  format %{ "lw  $dst, $src\t#@MoveF2I_stack_reg" %}
++  ins_cost(FDIV_COST);
++  format %{ "fdiv.s  $dst, $src1, $src2\t#@divF_reg_reg" %}
 +
 +  ins_encode %{
-+    __ lw(as_Register($dst$$reg), Address(sp, $src$$disp));
++    __ fdiv_s(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(iload_reg_reg);
-+
++  ins_pipe(fp_div_s);
 +%}
 +
-+instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{
-+
-+  match(Set dst (MoveI2F src));
-+
-+  effect(DEF dst, USE src);
-+
-+  ins_cost(LOAD_COST);
++instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{
++  match(Set dst (DivD src1  src2));
 +
-+  format %{ "flw  $dst, $src\t#@MoveI2F_stack_reg" %}
++  ins_cost(FDIV_COST);
++  format %{ "fdiv.d  $dst, $src1, $src2\t#@divD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
++    __ fdiv_d(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src1$$reg),
++              as_FloatRegister($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
-+
++  ins_pipe(fp_div_d);
 +%}
 +
-+instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
++instruct negF_reg_reg(fRegF dst, fRegF src) %{
++  match(Set dst (NegF src));
 +
-+  match(Set dst (MoveD2L src));
++  ins_cost(XFER_COST);
++  format %{ "fsgnjn.s  $dst, $src, $src\t#@negF_reg_reg" %}
 +
-+  effect(DEF dst, USE src);
++  ins_encode %{
++    __ fneg_s(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src$$reg));
++  %}
 +
-+  ins_cost(LOAD_COST);
++  ins_pipe(fp_uop_s);
++%}
 +
-+  format %{ "ld  $dst, $src\t#@MoveD2L_stack_reg" %}
++instruct negD_reg_reg(fRegD dst, fRegD src) %{
++  match(Set dst (NegD src));
++
++  ins_cost(XFER_COST);
++  format %{ "fsgnjn.d  $dst, $src, $src\t#@negD_reg_reg" %}
 +
 +  ins_encode %{
-+    __ ld(as_Register($dst$$reg), Address(sp, $src$$disp));
++    __ fneg_d(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src$$reg));
 +  %}
 +
-+  ins_pipe(iload_reg_reg);
-+
++  ins_pipe(fp_uop_d);
 +%}
 +
-+instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{
++instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{
++  match(Set dst (AbsI src));
 +
-+  match(Set dst (MoveL2D src));
++  ins_cost(ALU_COST * 3);
++  format %{
++    "sraiw  t0, $src, 0x1f\n\t"
++    "addw  $dst, $src, t0\n\t"
++    "xorr  $dst, $dst, t0\t#@absI_reg"
++  %}
 +
-+  effect(DEF dst, USE src);
++  ins_encode %{
++    __ sraiw(t0, as_Register($src$$reg), 0x1f);
++    __ addw(as_Register($dst$$reg), as_Register($src$$reg), t0);
++    __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0);
++  %}
 +
-+  ins_cost(LOAD_COST);
++  ins_pipe(ialu_reg_reg);
++%}
 +
-+  format %{ "fld  $dst, $src\t#@MoveL2D_stack_reg" %}
++instruct absL_reg(iRegLNoSp dst, iRegL src) %{
++  match(Set dst (AbsL src));
 +
-+  ins_encode %{
-+    __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
++  ins_cost(ALU_COST * 3);
++  format %{
++    "srai  t0, $src, 0x3f\n\t"
++    "add  $dst, $src, t0\n\t"
++    "xorr  $dst, $dst, t0\t#@absL_reg"
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
++  ins_encode %{
++    __ srai(t0, as_Register($src$$reg), 0x3f);
++    __ add(as_Register($dst$$reg), as_Register($src$$reg), t0);
++    __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0);
++  %}
 +
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{
-+
-+  match(Set dst (MoveF2I src));
++instruct absF_reg(fRegF dst, fRegF src) %{
++  match(Set dst (AbsF src));
 +
-+  effect(DEF dst, USE src);
++  ins_cost(XFER_COST);
++  format %{ "fsgnjx.s  $dst, $src, $src\t#@absF_reg" %}
++  ins_encode %{
++    __ fabs_s(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src$$reg));
++  %}
 +
-+  ins_cost(STORE_COST);
++  ins_pipe(fp_uop_s);
++%}
 +
-+  format %{ "fsw  $src, $dst\t#@MoveF2I_reg_stack" %}
++instruct absD_reg(fRegD dst, fRegD src) %{
++  match(Set dst (AbsD src));
 +
++  ins_cost(XFER_COST);
++  format %{ "fsgnjx.d  $dst, $src, $src\t#@absD_reg" %}
 +  ins_encode %{
-+    __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
++    __ fabs_d(as_FloatRegister($dst$$reg),
++              as_FloatRegister($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
-+
++  ins_pipe(fp_uop_d);
 +%}
 +
-+instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
-+
-+  match(Set dst (MoveI2F src));
++instruct sqrtF_reg(fRegF dst, fRegF src) %{
++  match(Set dst (SqrtF src));
 +
-+  effect(DEF dst, USE src);
++  ins_cost(FSQRT_COST);
++  format %{ "fsqrt.s  $dst, $src\t#@sqrtF_reg" %}
++  ins_encode %{
++    __ fsqrt_s(as_FloatRegister($dst$$reg),
++               as_FloatRegister($src$$reg));
++  %}
 +
-+  ins_cost(STORE_COST);
++  ins_pipe(fp_sqrt_s);
++%}
 +
-+  format %{ "sw  $src, $dst\t#@MoveI2F_reg_stack" %}
++instruct sqrtD_reg(fRegD dst, fRegD src) %{
++  match(Set dst (SqrtD src));
 +
++  ins_cost(FSQRT_COST);
++  format %{ "fsqrt.d  $dst, $src\t#@sqrtD_reg" %}
 +  ins_encode %{
-+    __ sw(as_Register($src$$reg), Address(sp, $dst$$disp));
++    __ fsqrt_d(as_FloatRegister($dst$$reg),
++               as_FloatRegister($src$$reg));
 +  %}
 +
-+  ins_pipe(istore_reg_reg);
-+
++  ins_pipe(fp_sqrt_d);
 +%}
 +
-+instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{
-+
-+  match(Set dst (MoveD2L src));
++// Arithmetic Instructions End
 +
-+  effect(DEF dst, USE src);
++// ============================================================================
++// Logical Instructions
 +
-+  ins_cost(STORE_COST);
++// Register And
++instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  match(Set dst (AndI src1 src2));
 +
-+  format %{ "fsd  $dst, $src\t#@MoveD2L_reg_stack" %}
++  format %{ "andr  $dst, $src1, $src2\t#@andI_reg_reg" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ andr(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
-+
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
-+
-+  match(Set dst (MoveL2D src));
-+
-+  effect(DEF dst, USE src);
-+
-+  ins_cost(STORE_COST);
++// Immediate And
++instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
++  match(Set dst (AndI src1 src2));
 +
-+  format %{ "sd  $src, $dst\t#@MoveL2D_reg_stack" %}
++  format %{ "andi  $dst, $src1, $src2\t#@andI_reg_imm" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ sd(as_Register($src$$reg), Address(sp, $dst$$disp));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ andi(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (int32_t)($src2$$constant));
 +  %}
 +
-+  ins_pipe(istore_reg_reg);
-+
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{
++// Register Or
++instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  match(Set dst (OrI src1 src2));
 +
-+  match(Set dst (MoveF2I src));
++  format %{ "orr  $dst, $src1, $src2\t#@orI_reg_reg" %}
++
++  ins_cost(ALU_COST);
++  ins_encode %{
++    Assembler::CompressibleRegion cr(&_masm);
++    __ orr(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
++  %}
 +
-+  effect(DEF dst, USE src);
++  ins_pipe(ialu_reg_reg);
++%}
 +
-+  ins_cost(XFER_COST);
++// Immediate Or
++instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
++  match(Set dst (OrI src1 src2));
 +
-+  format %{ "fmv.x.w  $dst, $src\t#@MoveL2D_reg_stack" %}
++  format %{ "ori  $dst, $src1, $src2\t#@orI_reg_imm" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg));
++    __ ori(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           (int32_t)($src2$$constant));
 +  %}
 +
-+  ins_pipe(fp_f2i);
-+
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{
-+
-+  match(Set dst (MoveI2F src));
-+
-+  effect(DEF dst, USE src);
-+
-+  ins_cost(XFER_COST);
++// Register Xor
++instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  match(Set dst (XorI src1 src2));
 +
-+  format %{ "fmv.w.x  $dst, $src\t#@MoveI2F_reg_reg" %}
++  format %{ "xorr  $dst, $src1, $src2\t#@xorI_reg_reg" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ xorr(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(fp_i2f);
-+
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
-+
-+  match(Set dst (MoveD2L src));
-+
-+  effect(DEF dst, USE src);
-+
-+  ins_cost(XFER_COST);
++// Immediate Xor
++instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{
++  match(Set dst (XorI src1 src2));
 +
-+  format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %}
++  format %{ "xori  $dst, $src1, $src2\t#@xorI_reg_imm" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg));
++    __ xori(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (int32_t)($src2$$constant));
 +  %}
 +
-+  ins_pipe(fp_d2l);
-+
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{
-+
-+  match(Set dst (MoveL2D src));
-+
-+  effect(DEF dst, USE src);
-+
-+  ins_cost(XFER_COST);
++// Register And Long
++instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (AndL src1 src2));
 +
-+  format %{ "fmv.d.x  $dst, $src\t#@MoveD2L_reg_reg" %}
++  format %{ "andr  $dst, $src1, $src2\t#@andL_reg_reg" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ andr(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(fp_l2d);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// ============================================================================
-+// Compare Instructions which set the result float comparisons in dest register.
-+
-+instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2)
-+%{
-+  match(Set dst (CmpF3 op1 op2));
++// Immediate And Long
++instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
++  match(Set dst (AndL src1 src2));
 +
-+  ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
-+  format %{ "flt.s  $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t"
-+            "bgtz   $dst, done\n\t"
-+            "feq.s  $dst, $op1, $op2\n\t"
-+            "addi   $dst, $dst, -1\t#@cmpF3_reg_reg"
-+  %}
++  format %{ "andi  $dst, $src1, $src2\t#@andL_reg_imm" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
-+    __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg),
-+                     as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ andi(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (int32_t)($src2$$constant));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2)
-+%{
-+  match(Set dst (CmpD3 op1 op2));
++// Register Or Long
++instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (OrL src1 src2));
 +
-+  ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
-+  format %{ "flt.d  $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t"
-+            "bgtz   $dst, done\n\t"
-+            "feq.d  $dst, $op1, $op2\n\t"
-+            "addi   $dst, $dst, -1\t#@cmpD3_reg_reg"
-+  %}
++  format %{ "orr  $dst, $src1, $src2\t#@orL_reg_reg" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
-+    __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ orr(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2)
-+%{
-+  match(Set dst (CmpL3 op1 op2));
++// Immediate Or Long
++instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
++  match(Set dst (OrL src1 src2));
 +
-+  ins_cost(ALU_COST * 3 + BRANCH_COST);
-+  format %{ "slt   $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t"
-+            "bnez  $dst, done\n\t"
-+            "slt  $dst, $op1, $op2\n\t"
-+            "neg   $dst, $dst\t#@cmpL3_reg_reg"
-+  %}
++  format %{ "ori  $dst, $src1, $src2\t#@orL_reg_imm" %}
++
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg));
-+    __ mv(as_Register($dst$$reg), t0);
++    __ ori(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           (int32_t)($src2$$constant));
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q)
-+%{
-+  match(Set dst (CmpLTMask p q));
-+
-+  ins_cost(2 * ALU_COST);
++// Register Xor Long
++instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
++  match(Set dst (XorL src1 src2));
 +
-+  format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t"
-+            "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg"
-+  %}
++  format %{ "xorr  $dst, $src1, $src2\t#@xorL_reg_reg" %}
 +
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg));
-+    __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ xorr(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
 +
 +  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero)
-+%{
-+  match(Set dst (CmpLTMask op zero));
++// Immediate Xor Long
++instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{
++  match(Set dst (XorL src1 src2));
 +
 +  ins_cost(ALU_COST);
-+
-+  format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %}
++  format %{ "xori  $dst, $src1, $src2\t#@xorL_reg_imm" %}
 +
 +  ins_encode %{
-+    __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31);
++    __ xori(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            (int32_t)($src2$$constant));
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(ialu_reg_imm);
 +%}
 +
-+
 +// ============================================================================
-+// Max and Min
-+
-+instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
-+%{
-+  match(Set dst (MinI src1 src2));
++// BSWAP Instructions
 +
-+  effect(DEF dst, USE src1, USE src2);
++instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) %{
++  match(Set dst (ReverseBytesI src));
++  effect(TEMP cr);
 +
-+  ins_cost(BRANCH_COST + ALU_COST * 2);
-+  format %{
-+    "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t"
-+    "mv $dst, $src2\n\t"
-+    "j Ldone\n\t"
-+    "bind Lsrc1\n\t"
-+    "mv $dst, $src1\n\t"
-+    "bind\t#@minI_rReg"
-+  %}
++  ins_cost(ALU_COST * 13);
++  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int" %}
 +
 +  ins_encode %{
-+    Label Lsrc1, Ldone;
-+    __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
-+    __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
-+    __ j(Ldone);
-+    __ bind(Lsrc1);
-+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
-+    __ bind(Ldone);
++    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
-+%{
-+  match(Set dst (MaxI src1 src2));
-+
-+  effect(DEF dst, USE src1, USE src2);
++instruct bytes_reverse_long(iRegLNoSp dst, iRegL src, rFlagsReg cr) %{
++  match(Set dst (ReverseBytesL src));
++  effect(TEMP cr);
 +
-+  ins_cost(BRANCH_COST + ALU_COST * 2);
-+  format %{
-+    "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t"
-+    "mv $dst, $src2\n\t"
-+    "j Ldone\n\t"
-+    "bind Lsrc1\n\t"
-+    "mv $dst, $src1\n\t"
-+    "bind\t#@maxI_rReg"
-+  %}
++  ins_cost(ALU_COST * 29);
++  format %{ "revb  $dst, $src\t#@bytes_reverse_long" %}
 +
 +  ins_encode %{
-+    Label Lsrc1, Ldone;
-+    __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
-+    __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
-+    __ j(Ldone);
-+    __ bind(Lsrc1);
-+    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
-+    __ bind(Ldone);
-+
++    __ revb(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// ============================================================================
-+// Branch Instructions
-+// Direct Branch.
-+instruct branch(label lbl)
-+%{
-+  match(Goto);
-+
-+  effect(USE lbl);
++instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
++  match(Set dst (ReverseBytesUS src));
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "j  $lbl\t#@branch" %}
++  ins_cost(ALU_COST * 5);
++  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short" %}
 +
-+  ins_encode(riscv_enc_j(lbl));
++  ins_encode %{
++    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
++  %}
 +
-+  ins_pipe(pipe_branch);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// ============================================================================
-+// Compare and Branch Instructions
-+
-+// Patterns for short (< 12KiB) variants
-+
-+// Compare flags and branch near instructions.
-+instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{
-+  match(If cmp cr);
-+  effect(USE lbl);
++instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
++  match(Set dst (ReverseBytesS src));
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp  $cr, zr, $lbl\t#@cmpFlag_branch" %}
++  ins_cost(ALU_COST * 5);
++  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label));
++    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
-+%}
 +
-+// Compare signed int and branch near instructions
-+instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpI_branch'.
-+  match(If cmp (CmpI op1 op2));
++  ins_pipe(ialu_reg);
++%}
 +
-+  effect(USE lbl);
++// ============================================================================
++// MemBar Instruction
 +
-+  ins_cost(BRANCH_COST);
++instruct load_fence() %{
++  match(LoadFence);
++  ins_cost(ALU_COST);
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpI_branch" %}
++  format %{ "#@load_fence" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
++    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
 +  %}
-+
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpI_loop'.
-+  match(CountedLoopEnd cmp (CmpI op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct membar_acquire() %{
++  match(MemBarAcquire);
++  ins_cost(ALU_COST);
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpI_loop" %}
++  format %{ "#@membar_acquire\n\t"
++            "fence ir iorw" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
++    __ block_comment("membar_acquire");
++    __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Compare unsigned int and branch near instructions
-+instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpU_branch'.
-+  match(If cmp (CmpU op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct membar_acquire_lock() %{
++  match(MemBarAcquireLock);
++  ins_cost(0);
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_branch" %}
++  format %{ "#@membar_acquire_lock (elided)" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    __ block_comment("membar_acquire_lock (elided)");
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpU_loop'.
-+  match(CountedLoopEnd cmp (CmpU op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct store_fence() %{
++  match(StoreFence);
++  ins_cost(ALU_COST);
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_loop" %}
++  format %{ "#@store_fence" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
 +  %}
-+
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Compare signed long and branch near instructions
-+instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpL_branch'.
-+  match(If cmp (CmpL op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct membar_release() %{
++  match(MemBarRelease);
++  ins_cost(ALU_COST);
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpL_branch" %}
++  format %{ "#@membar_release\n\t"
++            "fence iorw ow" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
++    __ block_comment("membar_release");
++    __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
 +  %}
-+
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpL_loop'.
-+  match(CountedLoopEnd cmp (CmpL op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct membar_storestore() %{
++  match(MemBarStoreStore);
++  ins_cost(ALU_COST);
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpL_loop" %}
++  format %{ "MEMBAR-store-store\t#@membar_storestore" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
++    __ membar(MacroAssembler::StoreStore);
 +  %}
-+
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Compare unsigned long and branch near instructions
-+instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpUL_branch'.
-+  match(If cmp (CmpUL op1 op2));
-+
-+  effect(USE lbl);
++instruct membar_release_lock() %{
++  match(MemBarReleaseLock);
++  ins_cost(0);
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_branch" %}
++  format %{ "#@membar_release_lock (elided)" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    __ block_comment("membar_release_lock (elided)");
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_serial);
 +%}
 +
-+instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpUL_loop'.
-+  match(CountedLoopEnd cmp (CmpUL op1 op2));
-+
-+  effect(USE lbl);
++instruct membar_volatile() %{
++  match(MemBarVolatile);
++  ins_cost(ALU_COST);
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_loop" %}
++  format %{ "#@membar_volatile\n\t"
++             "fence iorw iorw"%}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    __ block_comment("membar_volatile");
++    __ membar(MacroAssembler::StoreLoad);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// Compare pointer and branch near instructions
-+instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpP_branch'.
-+  match(If cmp (CmpP op1 op2));
-+
-+  effect(USE lbl);
++// ============================================================================
++// Cast Instructions (Java-level type cast)
 +
-+  ins_cost(BRANCH_COST);
++instruct castX2P(iRegPNoSp dst, iRegL src) %{
++  match(Set dst (CastX2P src));
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_branch" %}
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $src\t# long -> ptr, #@castX2P" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    Assembler::CompressibleRegion cr(&_masm);
++    if ($dst$$reg != $src$$reg) {
++      __ mv(as_Register($dst$$reg), as_Register($src$$reg));
++    }
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpP_loop'.
-+  match(CountedLoopEnd cmp (CmpP op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct castP2X(iRegLNoSp dst, iRegP src) %{
++  match(Set dst (CastP2X src));
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_loop" %}
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $src\t# ptr -> long, #@castP2X" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    Assembler::CompressibleRegion cr(&_masm);
++    if ($dst$$reg != $src$$reg) {
++      __ mv(as_Register($dst$$reg), as_Register($src$$reg));
++    }
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// Compare narrow pointer and branch near instructions
-+instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
++instruct castPP(iRegPNoSp dst)
 +%{
-+  // Same match rule as `far_cmpN_branch'.
-+  match(If cmp (CmpN op1 op2));
-+
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++  match(Set dst (CastPP dst));
++  ins_cost(0);
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_branch" %}
++  size(0);
++  format %{ "# castPP of $dst, #@castPP" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(pipe_class_empty);
++%}
 +
-+  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
-+  %}
++instruct castII(iRegI dst)
++%{
++  match(Set dst (CastII dst));
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  size(0);
++  format %{ "# castII of $dst, #@castII" %}
++  ins_encode(/* empty encoding */);
++  ins_cost(0);
++  ins_pipe(pipe_class_empty);
 +%}
 +
-+instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
++instruct checkCastPP(iRegPNoSp dst)
 +%{
-+  // Same match rule as `far_cmpN_loop'.
-+  match(CountedLoopEnd cmp (CmpN op1 op2));
++  match(Set dst (CheckCastPP dst));
 +
-+  effect(USE lbl);
++  size(0);
++  ins_cost(0);
++  format %{ "# checkcastPP of $dst, #@checkCastPP" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(pipe_class_empty);
++%}
 +
-+  ins_cost(BRANCH_COST);
++// ============================================================================
++// Convert Instructions
 +
-+  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_loop" %}
++// int to bool
++instruct convI2Bool(iRegINoSp dst, iRegI src)
++%{
++  match(Set dst (Conv2B src));
++
++  ins_cost(ALU_COST);
++  format %{ "snez  $dst, $src\t#@convI2Bool" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label));
++    __ snez(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// Compare float and branch near instructions
-+instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
++// pointer to bool
++instruct convP2Bool(iRegINoSp dst, iRegP src)
 +%{
-+  // Same match rule as `far_cmpF_branch'.
-+  match(If cmp (CmpF op1 op2));
-+
-+  effect(USE lbl);
++  match(Set dst (Conv2B src));
 +
-+  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "float_b$cmp $op1, $op2 $lbl \t#@cmpF_branch"%}
++  ins_cost(ALU_COST);
++  format %{ "snez  $dst, $src\t#@convP2Bool" %}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
++    __ snez(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpF_loop'.
-+  match(CountedLoopEnd cmp (CmpF op1 op2));
-+  effect(USE lbl);
++// int <-> long
 +
-+  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%}
++instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
++%{
++  match(Set dst (ConvI2L src));
 +
++  ins_cost(ALU_COST);
++  format %{ "addw  $dst, $src, zr\t#@convI2L_reg_reg" %}
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
++    __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
 +  %}
-+
-+  ins_pipe(pipe_class_compare);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// Compare double and branch near instructions
-+instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
-+%{
-+  // Same match rule as `far_cmpD_branch'.
-+  match(If cmp (CmpD op1 op2));
-+  effect(USE lbl);
++instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
++  match(Set dst (ConvL2I src));
 +
-+  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%}
++  ins_cost(ALU_COST);
++  format %{ "addw  $dst, $src, zr\t#@convL2I_reg" %}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+                        as_FloatRegister($op2$$reg), *($lbl$$label));
++    __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr);
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
++// int to unsigned long (Zero-extend)
++instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
 +%{
-+  // Same match rule as `far_cmpD_loop'.
-+  match(CountedLoopEnd cmp (CmpD op1 op2));
-+  effect(USE lbl);
++  match(Set dst (AndL (ConvI2L src) mask));
 +
-+  ins_cost(XFER_COST + BRANCH_COST);
-+  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%}
++  ins_cost(ALU_COST * 2);
++  format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+                        as_FloatRegister($op2$$reg), *($lbl$$label));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32);
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+// Compare signed int with zero and branch near instructions
-+instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpI_reg_imm0_branch'.
-+  match(If cmp (CmpI op1 zero));
++// float <-> double
 +
-+  effect(USE op1, USE lbl);
++instruct convF2D_reg(fRegD dst, fRegF src) %{
++  match(Set dst (ConvF2D src));
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %}
++  ins_cost(XFER_COST);
++  format %{ "fcvt.d.s  $dst, $src\t#@convF2D_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
++    __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_f2d);
 +%}
 +
-+instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpI_reg_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpI op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct convD2F_reg(fRegF dst, fRegD src) %{
++  match(Set dst (ConvD2F src));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %}
++  ins_cost(XFER_COST);
++  format %{ "fcvt.s.d  $dst, $src\t#@convD2F_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
++    __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_d2f);
 +%}
 +
-+// Compare unsigned int with zero and branch near instructions
-+instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'.
-+  match(If cmp (CmpU op1 zero));
-+
-+  effect(USE op1, USE lbl);
++// float <-> int
 +
-+  ins_cost(BRANCH_COST);
++instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{
++  match(Set dst (ConvF2I src));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %}
++  ins_cost(XFER_COST);
++  format %{ "fcvt.w.s  $dst, $src\t#@convF2I_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister);
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_f2i);
 +%}
 +
-+instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpU op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
-+
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %}
++instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{
++  match(Set dst (ConvI2F src));
 +
++  ins_cost(XFER_COST);
++  format %{ "fcvt.s.w  $dst, $src\t#@convI2F_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_i2f);
 +%}
 +
-+// Compare signed long with zero and branch near instructions
-+instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpL_reg_imm0_branch'.
-+  match(If cmp (CmpL op1 zero));
-+
-+  effect(USE op1, USE lbl);
++// float <-> long
 +
-+  ins_cost(BRANCH_COST);
++instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{
++  match(Set dst (ConvF2L src));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %}
++  ins_cost(XFER_COST);
++  format %{ "fcvt.l.s  $dst, $src\t#@convF2L_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
++    __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister);
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_f2l);
 +%}
 +
-+instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpL_reg_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpL op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct convL2F_reg_reg(fRegF dst, iRegL src) %{
++  match(Set dst (ConvL2F src));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %}
++  ins_cost(XFER_COST);
++  format %{ "fcvt.s.l  $dst, $src\t#@convL2F_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
++    __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_l2f);
 +%}
 +
-+// Compare unsigned long with zero and branch near instructions
-+instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'.
-+  match(If cmp (CmpUL op1 zero));
-+
-+  effect(USE op1, USE lbl);
++// double <-> int
 +
-+  ins_cost(BRANCH_COST);
++instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{
++  match(Set dst (ConvD2I src));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %}
++  ins_cost(XFER_COST);
++  format %{ "fcvt.w.d  $dst, $src\t#@convD2I_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister);
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_d2i);
 +%}
 +
-+instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpUL op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{
++  match(Set dst (ConvI2D src));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %}
++  ins_cost(XFER_COST);
++  format %{ "fcvt.d.w  $dst, $src\t#@convI2D_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_i2d);
 +%}
 +
-+// Compare pointer with zero and branch near instructions
-+instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
-+  // Same match rule as `far_cmpP_reg_imm0_branch'.
-+  match(If cmp (CmpP op1 zero));
-+  effect(USE lbl);
++// double <-> long
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_imm0_branch" %}
++instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
++  match(Set dst (ConvD2L src));
++
++  ins_cost(XFER_COST);
++  format %{ "fcvt.l.d  $dst, $src\t#@convD2L_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister);
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_d2l);
 +%}
 +
-+instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
-+  // Same match rule as `far_cmpP_reg_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpP op1 zero));
-+  effect(USE lbl);
++instruct convL2D_reg_reg(fRegD dst, iRegL src) %{
++  match(Set dst (ConvL2D src));
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_imm0_loop" %}
++  ins_cost(XFER_COST);
++  format %{ "fcvt.d.l  $dst, $src\t#@convL2D_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(fp_l2d);
 +%}
 +
-+// Compare narrow pointer with zero and branch near instructions
-+instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
-+  // Same match rule as `far_cmpN_reg_imm0_branch'.
-+  match(If cmp (CmpN op1 zero));
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++// Convert oop into int for vectors alignment masking
++instruct convP2I(iRegINoSp dst, iRegP src) %{
++  match(Set dst (ConvL2I (CastP2X src)));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpN_imm0_branch" %}
++  ins_cost(ALU_COST * 2);
++  format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ zero_extend($dst$$Register, $src$$Register, 32);
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
-+  // Same match rule as `far_cmpN_reg_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpN op1 zero));
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
++// Convert compressed oop into int for vectors alignment masking
++// in case of 32bit oops (heap < 4Gb).
++instruct convN2I(iRegINoSp dst, iRegN src)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
 +
-+  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpN_imm0_loop" %}
++  ins_cost(ALU_COST);
++  format %{ "mv  $dst, $src\t# compressed ptr -> int, #@convN2I" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    Assembler::CompressibleRegion cr(&_masm);
++    __ mv($dst$$Register, $src$$Register);
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// Compare narrow pointer with pointer zero and branch near instructions
-+instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
-+  // Same match rule as `far_cmpP_narrowOop_imm0_branch'.
-+  match(If cmp (CmpP (DecodeN op1) zero));
-+  effect(USE lbl);
++// Convert oop pointer into compressed form
++instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{
++  match(Set dst (EncodeP src));
++  ins_cost(ALU_COST);
++  format %{ "encode_heap_oop  $dst, $src\t#@encodeHeapOop" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    __ encode_heap_oop(d, s);
++  %}
++  ins_pipe(ialu_reg);
++%}
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %}
++instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
++            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
++  match(Set dst (DecodeN src));
 +
++  ins_cost(0);
++  format %{ "decode_heap_oop  $dst, $src\t#@decodeHeapOop" %}
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    __ decode_heap_oop(d, s);
 +  %}
-+
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
-+  // Same match rule as `far_cmpP_narrowOop_imm0_loop'.
-+  match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST);
-+  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %}
++instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
++            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
++  match(Set dst (DecodeN src));
 +
++  ins_cost(0);
++  format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %}
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    __ decode_heap_oop_not_null(d, s);
 +  %}
-+
-+  ins_pipe(pipe_cmpz_branch);
-+  ins_short_branch(1);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// Patterns for far (20KiB) variants
-+
-+instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{
-+  match(If cmp cr);
-+  effect(USE lbl);
++// Convert klass pointer into compressed form.
++instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
++  match(Set dst (EncodePKlass src));
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%}
++  ins_cost(ALU_COST);
++  format %{ "encode_klass_not_null  $dst, $src\t#@encodeKlass_not_null" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    __ encode_klass_not_null(dst_reg, src_reg, t0);
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++   ins_pipe(ialu_reg);
 +%}
 +
-+// Compare signed int and branch far instructions
-+instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
-+  match(If cmp (CmpI op1 op2));
-+  effect(USE lbl);
++instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src, iRegPNoSp tmp) %{
++  match(Set dst (DecodeNKlass src));
 +
-+  ins_cost(BRANCH_COST * 2);
++  effect(TEMP tmp);
 +
-+  // the format instruction [far_b$cmp] here is be used as two insructions
-+  // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done)
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpI_branch" %}
++  ins_cost(ALU_COST);
++  format %{ "decode_klass_not_null  $dst, $src\t#@decodeKlass_not_null" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    Register tmp_reg = as_Register($tmp$$reg);
++    __ decode_klass_not_null(dst_reg, src_reg, tmp_reg);
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++   ins_pipe(ialu_reg);
 +%}
 +
-+instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpI op1 op2));
-+  effect(USE lbl);
++// stack <-> reg and reg <-> reg shuffles with no conversion
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpI_loop" %}
++instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
 +
-+  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
-+  %}
++  match(Set dst (MoveF2I src));
 +
-+  ins_pipe(pipe_cmp_branch);
-+%}
++  effect(DEF dst, USE src);
 +
-+instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
-+  match(If cmp (CmpU op1 op2));
-+  effect(USE lbl);
++  ins_cost(LOAD_COST);
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}
++  format %{ "lw  $dst, $src\t#@MoveF2I_stack_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ lw(as_Register($dst$$reg), Address(sp, $src$$disp));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(iload_reg_reg);
++
 +%}
 +
-+instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpU op1 op2));
-+  effect(USE lbl);
++instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}
++  match(Set dst (MoveI2F src));
++
++  effect(DEF dst, USE src);
++
++  ins_cost(LOAD_COST);
++
++  format %{ "flw  $dst, $src\t#@MoveI2F_stack_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(pipe_class_memory);
++
 +%}
 +
-+instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
-+  match(If cmp (CmpL op1 op2));
-+  effect(USE lbl);
++instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpL_branch" %}
++  match(Set dst (MoveD2L src));
++
++  effect(DEF dst, USE src);
++
++  ins_cost(LOAD_COST);
++
++  format %{ "ld  $dst, $src\t#@MoveD2L_stack_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ ld(as_Register($dst$$reg), Address(sp, $src$$disp));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(iload_reg_reg);
++
 +%}
 +
-+instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpL op1 op2));
-+  effect(USE lbl);
++instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpL_loop" %}
++  match(Set dst (MoveL2D src));
++
++  effect(DEF dst, USE src);
++
++  ins_cost(LOAD_COST);
++
++  format %{ "fld  $dst, $src\t#@MoveL2D_stack_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(pipe_class_memory);
++
 +%}
 +
-+instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
-+  match(If cmp (CmpUL op1 op2));
-+  effect(USE lbl);
++instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_branch" %}
++  match(Set dst (MoveF2I src));
++
++  effect(DEF dst, USE src);
++
++  ins_cost(STORE_COST);
++
++  format %{ "fsw  $src, $dst\t#@MoveF2I_reg_stack" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(pipe_class_memory);
++
 +%}
 +
-+instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpUL op1 op2));
-+  effect(USE lbl);
++instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_loop" %}
++  match(Set dst (MoveI2F src));
++
++  effect(DEF dst, USE src);
++
++  ins_cost(STORE_COST);
++
++  format %{ "sw  $src, $dst\t#@MoveI2F_reg_stack" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sw(as_Register($src$$reg), Address(sp, $dst$$disp));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(istore_reg_reg);
++
 +%}
 +
-+instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-+%{
-+  match(If cmp (CmpP op1 op2));
++instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{
 +
-+  effect(USE lbl);
++  match(Set dst (MoveD2L src));
 +
-+  ins_cost(BRANCH_COST * 2);
++  effect(DEF dst, USE src);
 +
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_branch" %}
++  ins_cost(STORE_COST);
++
++  format %{ "fsd  $dst, $src\t#@MoveD2L_reg_stack" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(pipe_class_memory);
++
 +%}
 +
-+instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpP op1 op2));
++instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
 +
-+  effect(USE lbl);
++  match(Set dst (MoveL2D src));
 +
-+  ins_cost(BRANCH_COST * 2);
++  effect(DEF dst, USE src);
 +
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_loop" %}
++  ins_cost(STORE_COST);
++
++  format %{ "sd  $src, $dst\t#@MoveL2D_reg_stack" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    Assembler::CompressibleRegion cr(&_masm);
++    __ sd(as_Register($src$$reg), Address(sp, $dst$$disp));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(istore_reg_reg);
++
 +%}
 +
-+instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-+%{
-+  match(If cmp (CmpN op1 op2));
++instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{
 +
-+  effect(USE lbl);
++  match(Set dst (MoveF2I src));
 +
-+  ins_cost(BRANCH_COST * 2);
++  effect(DEF dst, USE src);
 +
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_branch" %}
++  ins_cost(XFER_COST);
++
++  format %{ "fmv.x.w  $dst, $src\t#@MoveL2D_reg_stack" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(fp_f2i);
++
 +%}
 +
-+instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpN op1 op2));
++instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{
 +
-+  effect(USE lbl);
++  match(Set dst (MoveI2F src));
 +
-+  ins_cost(BRANCH_COST * 2);
++  effect(DEF dst, USE src);
 +
-+  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_loop" %}
++  ins_cost(XFER_COST);
++
++  format %{ "fmv.w.x  $dst, $src\t#@MoveI2F_reg_reg" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
-+                  as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++    __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmp_branch);
++  ins_pipe(fp_i2f);
++
 +%}
 +
-+// Float compare and branch instructions
-+instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-+%{
-+  match(If cmp (CmpF op1 op2));
++instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{
 +
-+  effect(USE lbl);
++  match(Set dst (MoveD2L src));
 +
-+  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%}
++  effect(DEF dst, USE src);
++
++  ins_cost(XFER_COST);
++
++  format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
-+                        *($lbl$$label), /* is_far */ true);
++    __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
++  ins_pipe(fp_d2l);
++
 +%}
 +
-+instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpF op1 op2));
-+  effect(USE lbl);
++instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{
 +
-+  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%}
++  match(Set dst (MoveL2D src));
++
++  effect(DEF dst, USE src);
++
++  ins_cost(XFER_COST);
++
++  format %{ "fmv.d.x  $dst, $src\t#@MoveD2L_reg_reg" %}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
-+                        *($lbl$$label), /* is_far */ true);
++    __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
++  ins_pipe(fp_l2d);
 +%}
 +
-+// Double compare and branch instructions
-+instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
++// ============================================================================
++// Compare Instructions which set the result float comparisons in dest register.
++
++instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2)
 +%{
-+  match(If cmp (CmpD op1 op2));
-+  effect(USE lbl);
++  match(Set dst (CmpF3 op1 op2));
 +
-+  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%}
++  ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
++  format %{ "flt.s  $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t"
++            "bgtz   $dst, done\n\t"
++            "feq.s  $dst, $op1, $op2\n\t"
++            "addi   $dst, $dst, -1\t#@cmpF3_reg_reg"
++  %}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
++    // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
++    __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg),
++                     as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
++instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2)
 +%{
-+  match(CountedLoopEnd cmp (CmpD op1 op2));
-+  effect(USE lbl);
++  match(Set dst (CmpD3 op1 op2));
 +
-+  ins_cost(XFER_COST + BRANCH_COST * 2);
-+  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%}
++  ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST);
++  format %{ "flt.d  $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t"
++            "bgtz   $dst, done\n\t"
++            "feq.d  $dst, $op1, $op2\n\t"
++            "addi   $dst, $dst, -1\t#@cmpD3_reg_reg"
++  %}
 +
 +  ins_encode %{
-+    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
-+                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
++    // we want -1 for unordered or less than, 0 for equal and 1 for greater than.
++    __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/);
 +  %}
 +
-+  ins_pipe(pipe_class_compare);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
++instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2)
 +%{
-+  match(If cmp (CmpI op1 zero));
-+
-+  effect(USE op1, USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
-+
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %}
++  match(Set dst (CmpL3 op1 op2));
 +
++  ins_cost(ALU_COST * 3 + BRANCH_COST);
++  format %{ "slt   $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t"
++            "bnez  $dst, done\n\t"
++            "slt  $dst, $op1, $op2\n\t"
++            "neg   $dst, $dst\t#@cmpL3_reg_reg"
++  %}
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
++    __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg));
++    __ mv(as_Register($dst$$reg), t0);
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_class_default);
 +%}
 +
-+instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
++instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q)
 +%{
-+  match(CountedLoopEnd cmp (CmpI op1 zero));
-+
-+  effect(USE op1, USE lbl);
++  match(Set dst (CmpLTMask p q));
 +
-+  ins_cost(BRANCH_COST * 2);
++  ins_cost(2 * ALU_COST);
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %}
++  format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t"
++            "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg"
++  %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
++    __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg));
++    __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
++instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero)
 +%{
-+  match(If cmp (CmpU op1 zero));
-+
-+  effect(USE op1, USE lbl);
++  match(Set dst (CmpLTMask op zero));
 +
-+  ins_cost(BRANCH_COST * 2);
++  ins_cost(ALU_COST);
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %}
++  format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31);
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
-+%{
-+  match(CountedLoopEnd cmp (CmpU op1 zero));
 +
-+  effect(USE op1, USE lbl);
++// ============================================================================
++// Max and Min
 +
-+  ins_cost(BRANCH_COST * 2);
++instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
++%{
++  match(Set dst (MinI src1 src2));
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %}
++  effect(DEF dst, USE src1, USE src2);
 +
++  ins_cost(BRANCH_COST + ALU_COST * 2);
++  format %{
++    "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t"
++    "mv $dst, $src2\n\t"
++    "j Ldone\n\t"
++    "bind Lsrc1\n\t"
++    "mv $dst, $src1\n\t"
++    "bind\t#@minI_rReg"
++  %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    Label Lsrc1, Ldone;
++    __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
++    __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
++    __ j(Ldone);
++    __ bind(Lsrc1);
++    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
++    __ bind(Ldone);
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// compare lt/ge unsigned instructs has no short instruct with same match
-+instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
++instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2)
 +%{
-+  match(If cmp (CmpU op1 zero));
++  match(Set dst (MaxI src1 src2));
 +
-+  effect(USE op1, USE lbl);
++  effect(DEF dst, USE src1, USE src2);
 +
-+  ins_cost(BRANCH_COST);
++  ins_cost(BRANCH_COST + ALU_COST * 2);
++  format %{
++    "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t"
++    "mv $dst, $src2\n\t"
++    "j Ldone\n\t"
++    "bind Lsrc1\n\t"
++    "mv $dst, $src1\n\t"
++    "bind\t#@maxI_rReg"
++  %}
 +
-+  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %}
++  ins_encode %{
++    Label Lsrc1, Ldone;
++    __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1);
++    __ mv(as_Register($dst$$reg), as_Register($src2$$reg));
++    __ j(Ldone);
++    __ bind(Lsrc1);
++    __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
++    __ bind(Ldone);
 +
-+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
++  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
++// ============================================================================
++// Branch Instructions
++// Direct Branch.
++instruct branch(label lbl)
 +%{
-+  match(CountedLoopEnd cmp (CmpU op1 zero));
++  match(Goto);
 +
-+  effect(USE op1, USE lbl);
++  effect(USE lbl);
 +
 +  ins_cost(BRANCH_COST);
++  format %{ "j  $lbl\t#@branch" %}
 +
-+  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %}
-+
-+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
++  ins_encode(riscv_enc_j(lbl));
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_branch);
 +%}
 +
-+instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
-+%{
-+  match(If cmp (CmpL op1 zero));
++// ============================================================================
++// Compare and Branch Instructions
 +
-+  effect(USE op1, USE lbl);
++// Patterns for short (< 12KiB) variants
 +
-+  ins_cost(BRANCH_COST * 2);
++// Compare flags and branch near instructions.
++instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{
++  match(If cmp cr);
++  effect(USE lbl);
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %}
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp  $cr, zr, $lbl\t#@cmpFlag_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label));
 +  %}
-+
 +  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
++// Compare signed int and branch near instructions
++instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
 +%{
-+  match(CountedLoopEnd cmp (CmpL op1 zero));
++  // Same match rule as `far_cmpI_branch'.
++  match(If cmp (CmpI op1 op2));
 +
-+  effect(USE op1, USE lbl);
++  effect(USE lbl);
 +
-+  ins_cost(BRANCH_COST * 2);
++  ins_cost(BRANCH_COST);
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %}
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpI_branch" %}
 +
 +  ins_encode %{
-+    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
++instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl)
 +%{
-+  match(If cmp (CmpUL op1 zero));
++  // Same match rule as `far_cmpI_loop'.
++  match(CountedLoopEnd cmp (CmpI op1 op2));
 +
-+  effect(USE op1, USE lbl);
++  effect(USE lbl);
 +
-+  ins_cost(BRANCH_COST * 2);
++  ins_cost(BRANCH_COST);
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %}
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpI_loop" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
++// Compare unsigned int and branch near instructions
++instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
 +%{
-+  match(CountedLoopEnd cmp (CmpUL op1 zero));
++  // Same match rule as `far_cmpU_branch'.
++  match(If cmp (CmpU op1 op2));
 +
-+  effect(USE op1, USE lbl);
++  effect(USE lbl);
 +
-+  ins_cost(BRANCH_COST * 2);
++  ins_cost(BRANCH_COST);
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %}
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_branch" %}
 +
 +  ins_encode %{
-+    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+// compare lt/ge unsigned instructs has no short instruct with same match
-+instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
++instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl)
 +%{
-+  match(If cmp (CmpUL op1 zero));
++  // Same match rule as `far_cmpU_loop'.
++  match(CountedLoopEnd cmp (CmpU op1 op2));
 +
-+  effect(USE op1, USE lbl);
++  effect(USE lbl);
 +
 +  ins_cost(BRANCH_COST);
 +
-+  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %}
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpU_loop" %}
 +
-+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
++  ins_encode %{
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
++  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
++// Compare signed long and branch near instructions
++instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
 +%{
-+  match(CountedLoopEnd cmp (CmpUL op1 zero));
++  // Same match rule as `far_cmpL_branch'.
++  match(If cmp (CmpL op1 op2));
 +
-+  effect(USE op1, USE lbl);
++  effect(USE lbl);
 +
 +  ins_cost(BRANCH_COST);
 +
-+  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %}
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpL_branch" %}
 +
-+  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
++  ins_encode %{
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
++  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
-+  match(If cmp (CmpP op1 zero));
++instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl)
++%{
++  // Same match rule as `far_cmpL_loop'.
++  match(CountedLoopEnd cmp (CmpL op1 op2));
++
 +  effect(USE lbl);
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %}
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpL_loop" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpP op1 zero));
++// Compare unsigned long and branch near instructions
++instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
++%{
++  // Same match rule as `far_cmpUL_branch'.
++  match(If cmp (CmpUL op1 op2));
++
 +  effect(USE lbl);
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %}
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_branch" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
-+  match(If cmp (CmpN op1 zero));
-+  effect(USE lbl);
++instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl)
++%{
++  // Same match rule as `far_cmpUL_loop'.
++  match(CountedLoopEnd cmp (CmpUL op1 op2));
 +
-+  ins_cost(BRANCH_COST * 2);
++  effect(USE lbl);
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %}
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpUL_loop" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpN op1 zero));
++// Compare pointer and branch near instructions
++instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
++%{
++  // Same match rule as `far_cmpP_branch'.
++  match(If cmp (CmpP op1 op2));
++
 +  effect(USE lbl);
 +
-+  ins_cost(BRANCH_COST * 2);
++  ins_cost(BRANCH_COST);
 +
-+  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %}
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_branch" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
-+  match(If cmp (CmpP (DecodeN op1) zero));
++instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
++%{
++  // Same match rule as `far_cmpP_loop'.
++  match(CountedLoopEnd cmp (CmpP op1 op2));
++
 +  effect(USE lbl);
 +
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %}
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpP_loop" %}
 +
 +  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_cmpz_branch);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
-+  match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
-+  effect(USE lbl);
-+
-+  ins_cost(BRANCH_COST * 2);
-+  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %}
-+
-+  ins_encode %{
-+    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
-+  %}
++// Compare narrow pointer and branch near instructions
++instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
++%{
++  // Same match rule as `far_cmpN_branch'.
++  match(If cmp (CmpN op1 op2));
 +
-+  ins_pipe(pipe_cmpz_branch);
-+%}
++  effect(USE lbl);
 +
-+// ============================================================================
-+// Conditional Move Instructions
-+instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{
-+  match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src)));
-+  ins_cost(ALU_COST + BRANCH_COST);
++  ins_cost(BRANCH_COST);
 +
-+  format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t"
-+            "mv $dst, $src\n\t"
-+            "skip:"
-+  %}
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_branch" %}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode,
-+                 as_Register($op1$$reg), as_Register($op2$$reg),
-+                 as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{
-+  match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src)));
-+  ins_cost(ALU_COST + BRANCH_COST);
++instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
++%{
++  // Same match rule as `far_cmpN_loop'.
++  match(CountedLoopEnd cmp (CmpN op1 op2));
 +
-+  format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t"
-+            "mv $dst, $src\n\t"
-+            "skip:"
-+  %}
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, $op2, $lbl\t#@cmpN_loop" %}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode,
-+                 as_Register($op1$$reg), as_Register($op2$$reg),
-+                 as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_cmp_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{
-+  match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src)));
-+  ins_cost(ALU_COST + BRANCH_COST);
-+  format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t"
-+            "mv $dst, $src\n\t"
-+            "skip:"
-+  %}
++// Compare float and branch near instructions
++instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
++%{
++  // Same match rule as `far_cmpF_branch'.
++  match(If cmp (CmpF op1 op2));
++
++  effect(USE lbl);
++
++  ins_cost(XFER_COST + BRANCH_COST);
++  format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
-+                 as_Register($op1$$reg), as_Register($op2$$reg),
-+                 as_Register($dst$$reg), as_Register($src$$reg));
++    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
++  ins_short_branch(1);
 +%}
 +
-+instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{
-+  match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src)));
-+  ins_cost(ALU_COST + BRANCH_COST);
-+  format %{ "bneg$cop $op1 $op2, skip\t#@cmovI_cmpUL\n\t"
-+            "mv $dst, $src\n\t"
-+            "skip:"
-+  %}
++instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
++%{
++  // Same match rule as `far_cmpF_loop'.
++  match(CountedLoopEnd cmp (CmpF op1 op2));
++  effect(USE lbl);
++
++  ins_cost(XFER_COST + BRANCH_COST);
++  format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
-+                 as_Register($op1$$reg), as_Register($op2$$reg),
-+                 as_Register($dst$$reg), as_Register($src$$reg));
++    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
++  ins_short_branch(1);
 +%}
 +
-+instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{
-+  match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src)));
-+  ins_cost(ALU_COST + BRANCH_COST);
++// Compare double and branch near instructions
++instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
++%{
++  // Same match rule as `far_cmpD_branch'.
++  match(If cmp (CmpD op1 op2));
++  effect(USE lbl);
 +
-+  format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t"
-+            "mv $dst, $src\n\t"
-+            "skip:"
-+  %}
++  ins_cost(XFER_COST + BRANCH_COST);
++  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode,
-+                 as_Register($op1$$reg), as_Register($op2$$reg),
-+                 as_Register($dst$$reg), as_Register($src$$reg));
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++                        as_FloatRegister($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
++  ins_short_branch(1);
 +%}
 +
-+instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{
-+  match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src)));
-+  ins_cost(ALU_COST + BRANCH_COST);
++instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
++%{
++  // Same match rule as `far_cmpD_loop'.
++  match(CountedLoopEnd cmp (CmpD op1 op2));
++  effect(USE lbl);
 +
-+  format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t"
-+            "mv $dst, $src\n\t"
-+            "skip:"
-+  %}
++  ins_cost(XFER_COST + BRANCH_COST);
++  format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%}
 +
 +  ins_encode %{
-+    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
-+                 as_Register($op1$$reg), as_Register($op2$$reg),
-+                 as_Register($dst$$reg), as_Register($src$$reg));
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++                        as_FloatRegister($op2$$reg), *($lbl$$label));
 +  %}
 +
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_compare);
++  ins_short_branch(1);
 +%}
 +
-+
-+// ============================================================================
-+// Procedure Call/Return Instructions
-+
-+// Call Java Static Instruction
-+
-+instruct CallStaticJavaDirect(method meth)
++// Compare signed int with zero and branch near instructions
++instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
 +%{
-+  match(CallStaticJava);
++  // Same match rule as `far_cmpI_reg_imm0_branch'.
++  match(If cmp (CmpI op1 zero));
 +
-+  effect(USE meth);
++  effect(USE op1, USE lbl);
 +
 +  ins_cost(BRANCH_COST);
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %}
 +
-+  format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %}
-+
-+  ins_encode( riscv_enc_java_static_call(meth),
-+              riscv_enc_call_epilog );
++  ins_encode %{
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
++  %}
 +
-+  ins_pipe(pipe_class_call);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+// TO HERE
-+
-+// Call Java Dynamic Instruction
-+instruct CallDynamicJavaDirect(method meth, rFlagsReg cr)
++instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
 +%{
-+  match(CallDynamicJava);
++  // Same match rule as `far_cmpI_reg_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpI op1 zero));
 +
-+  effect(USE meth, KILL cr);
++  effect(USE op1, USE lbl);
 +
-+  ins_cost(BRANCH_COST + ALU_COST * 6);
++  ins_cost(BRANCH_COST);
 +
-+  format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %}
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %}
 +
-+  ins_encode( riscv_enc_java_dynamic_call(meth),
-+               riscv_enc_call_epilog );
++  ins_encode %{
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
++  %}
 +
-+  ins_pipe(pipe_class_call);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+// Call Runtime Instruction
-+
-+instruct CallRuntimeDirect(method meth, rFlagsReg cr)
++// Compare unsigned int with zero and branch near instructions
++instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
 +%{
-+  match(CallRuntime);
++  // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'.
++  match(If cmp (CmpU op1 zero));
 +
-+  effect(USE meth, KILL cr);
++  effect(USE op1, USE lbl);
 +
 +  ins_cost(BRANCH_COST);
 +
-+  format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %}
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %}
 +
-+  ins_encode( riscv_enc_java_to_runtime(meth) );
++  ins_encode %{
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++  %}
 +
-+  ins_pipe(pipe_class_call);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+// Call Runtime Instruction
-+
-+instruct CallLeafDirect(method meth, rFlagsReg cr)
++instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
 +%{
-+  match(CallLeaf);
++  // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpU op1 zero));
 +
-+  effect(USE meth, KILL cr);
++  effect(USE op1, USE lbl);
 +
 +  ins_cost(BRANCH_COST);
 +
-+  format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %}
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %}
 +
-+  ins_encode( riscv_enc_java_to_runtime(meth) );
 +
-+  ins_pipe(pipe_class_call);
-+%}
++  ins_encode %{
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
++  %}
 +
-+// Call Runtime Instruction
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
++%}
 +
-+instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
++// Compare signed long with zero and branch near instructions
++instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
 +%{
-+  match(CallLeafNoFP);
++  // Same match rule as `far_cmpL_reg_imm0_branch'.
++  match(If cmp (CmpL op1 zero));
 +
-+  effect(USE meth, KILL cr);
++  effect(USE op1, USE lbl);
 +
 +  ins_cost(BRANCH_COST);
 +
-+  format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %}
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %}
 +
-+  ins_encode( riscv_enc_java_to_runtime(meth) );
++  ins_encode %{
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
++  %}
 +
-+  ins_pipe(pipe_class_call);
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+// ============================================================================
-+// Partial Subtype Check
-+//
-+// superklass array for an instance of the superklass.  Set a hidden
-+// internal cache on a hit (cache is checked with exposed code in
-+// gen_subtype_check()).  Return zero for a hit.  The encoding
-+// ALSO sets flags.
-+
-+instruct partialSubtypeCheck(rFlagsReg cr, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result)
++instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
 +%{
-+  match(Set result (PartialSubtypeCheck sub super));
-+  effect(KILL temp, KILL cr);
++  // Same match rule as `far_cmpL_reg_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpL op1 zero));
 +
-+  ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
-+  format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %}
++  effect(USE op1, USE lbl);
 +
-+  ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result));
++  ins_cost(BRANCH_COST);
 +
-+  opcode(0x1); // Force zero of result reg on hit
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %}
 +
-+  ins_pipe(pipe_class_memory);
++  ins_encode %{
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label));
++  %}
++
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct partialSubtypeCheckVsZero(iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result,
-+                                           immP0 zero, rFlagsReg cr)
++// Compare unsigned long with zero and branch near instructions
++instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
 +%{
-+  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
-+  effect(KILL temp, KILL result);
-+
-+  ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
-+  format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %}
-+
-+  ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result));
++  // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'.
++  match(If cmp (CmpUL op1 zero));
 +
-+  opcode(0x0); // Don't zero result reg on hit
++  effect(USE op1, USE lbl);
 +
-+  ins_pipe(pipe_class_memory);
-+%}
++  ins_cost(BRANCH_COST);
 +
-+instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %}
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
 +  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_compare($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
-+                      StrIntrinsicNode::UU);
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
++instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
 +%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++  // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpUL op1 zero));
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
-+  ins_encode %{
-+    __ string_compare($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
-+                      StrIntrinsicNode::LL);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++  effect(USE op1, USE lbl);
 +
-+instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %}
 +
-+  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
 +  ins_encode %{
-+    __ string_compare($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
-+                      StrIntrinsicNode::UL);
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
-+                          rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++// Compare pointer with zero and branch near instructions
++instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
++  // Same match rule as `far_cmpP_reg_imm0_branch'.
++  match(If cmp (CmpP op1 zero));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_imm0_branch" %}
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
 +  ins_encode %{
-+    __ string_compare($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
-+                      StrIntrinsicNode::LU);
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+       iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-+       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp)
-+%{
-+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp);
-+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
++instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
++  // Same match rule as `far_cmpP_reg_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpP op1 zero));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_imm0_loop" %}
 +
 +  ins_encode %{
-+    __ string_indexof($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register,
-+                      $tmp1$$Register, $tmp2$$Register,
-+                      $tmp3$$Register, $tmp4$$Register,
-+                      $tmp5$$Register, $tmp6$$Register,
-+                      $result$$Register, StrIntrinsicNode::UU);
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+       iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-+       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp)
-+%{
-+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp);
-+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
++// Compare narrow pointer with zero and branch near instructions
++instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
++  // Same match rule as `far_cmpN_reg_imm0_branch'.
++  match(If cmp (CmpN op1 zero));
++  effect(USE lbl);
 +
-+  ins_encode %{
-+    __ string_indexof($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register,
-+                      $tmp1$$Register, $tmp2$$Register,
-+                      $tmp3$$Register, $tmp4$$Register,
-+                      $tmp5$$Register, $tmp6$$Register,
-+                      $result$$Register, StrIntrinsicNode::LL);
-+  %}
-+  ins_pipe(pipe_class_memory);
-+%}
++  ins_cost(BRANCH_COST);
 +
-+instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+       iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
-+       iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp)
-+%{
-+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
-+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp);
-+  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpN_imm0_branch" %}
 +
 +  ins_encode %{
-+    __ string_indexof($str1$$Register, $str2$$Register,
-+                      $cnt1$$Register, $cnt2$$Register,
-+                      $tmp1$$Register, $tmp2$$Register,
-+                      $tmp3$$Register, $tmp4$$Register,
-+                      $tmp5$$Register, $tmp6$$Register,
-+                      $result$$Register, StrIntrinsicNode::UL);
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
-+                 immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
-+%{
-+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
-+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
++instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
++  // Same match rule as `far_cmpN_reg_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpN op1 zero));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "b$cmp  $op1, zr, $lbl\t#@cmpN_imm0_loop" %}
 +
 +  ins_encode %{
-+    int icnt2 = (int)$int_cnt2$$constant;
-+    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
-+                                 $cnt1$$Register, zr,
-+                                 $tmp1$$Register, $tmp2$$Register,
-+                                 $tmp3$$Register, $tmp4$$Register,
-+                                 icnt2, $result$$Register, StrIntrinsicNode::UU);
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
-+                 immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
-+%{
-+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
-+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
++// Compare narrow pointer with pointer zero and branch near instructions
++instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
++  // Same match rule as `far_cmpP_narrowOop_imm0_branch'.
++  match(If cmp (CmpP (DecodeN op1) zero));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %}
 +
 +  ins_encode %{
-+    int icnt2 = (int)$int_cnt2$$constant;
-+    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
-+                                 $cnt1$$Register, zr,
-+                                 $tmp1$$Register, $tmp2$$Register,
-+                                 $tmp3$$Register, $tmp4$$Register,
-+                                 icnt2, $result$$Register, StrIntrinsicNode::LL);
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
-+                 immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                 iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
-+%{
-+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
-+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
-+  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
++instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
++  // Same match rule as `far_cmpP_narrowOop_imm0_loop'.
++  match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++  format %{ "b$cmp   $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %}
 +
 +  ins_encode %{
-+    int icnt2 = (int)$int_cnt2$$constant;
-+    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
-+                                 $cnt1$$Register, zr,
-+                                 $tmp1$$Register, $tmp2$$Register,
-+                                 $tmp3$$Register, $tmp4$$Register,
-+                                 icnt2, $result$$Register, StrIntrinsicNode::UL);
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmpz_branch);
++  ins_short_branch(1);
 +%}
 +
-+instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-+                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
-+%{
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
-+  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
++// Patterns for far (20KiB) variants
 +
-+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
++instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{
++  match(If cmp cr);
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST);
++  format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%}
 +
 +  ins_encode %{
-+    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
-+                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
-+                           $tmp3$$Register, $tmp4$$Register, false /* isU */) ;
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
++// Compare signed int and branch far instructions
++instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
++  match(If cmp (CmpI op1 op2));
++  effect(USE lbl);
 +
-+instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
-+                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
-+                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp)
-+%{
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
-+  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
-+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp);
++  ins_cost(BRANCH_COST * 2);
 +
-+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
++  // the format instruction [far_b$cmp] here is be used as two insructions
++  // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done)
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpI_branch" %}
 +
 +  ins_encode %{
-+    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
-+                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
-+                           $tmp3$$Register, $tmp4$$Register, true /* isL */);
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+// clearing of an array
-+instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV);
-+  match(Set dummy (ClearArray cnt base));
-+  effect(USE_KILL cnt, USE_KILL base, KILL cr);
++instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{
++  match(CountedLoopEnd cmp (CmpI op1 op2));
++  effect(USE lbl);
 +
-+  ins_cost(4 * DEFAULT_COST);
-+  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpI_loop" %}
 +
 +  ins_encode %{
-+    address tpc = __ zero_words($base$$Register, $cnt$$Register);
-+    if (tpc == NULL) {
-+      ciEnv::current()->record_failure("CodeCache is full");
-+      return;
-+    }
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && (uint64_t)n->in(2)->get_long()
-+            < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
-+  match(Set dummy (ClearArray cnt base));
-+  effect(USE_KILL base, KILL cr);
++instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
++  match(If cmp (CmpU op1 op2));
++  effect(USE lbl);
 +
-+  ins_cost(4 * DEFAULT_COST);
-+  format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %}
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %}
 +
 +  ins_encode %{
-+    __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-+                        iRegI_R10 result, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (StrEquals (Binary str1 str2) cnt));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
++instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{
++  match(CountedLoopEnd cmp (CmpU op1 op2));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %}
 +
-+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
 +  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_equals($str1$$Register, $str2$$Register,
-+                     $result$$Register, $cnt$$Register, 1);
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-+                        iRegI_R10 result, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (StrEquals (Binary str1 str2) cnt));
-+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
++instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
++  match(If cmp (CmpL op1 op2));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpL_branch" %}
 +
-+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
 +  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_equals($str1$$Register, $str2$$Register,
-+                     $result$$Register, $cnt$$Register, 2);
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-+                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
-+                       iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (AryEq ary1 ary2));
-+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
++instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{
++  match(CountedLoopEnd cmp (CmpL op1 op2));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpL_loop" %}
 +
-+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
 +  ins_encode %{
-+    address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
-+                                   $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
-+                                   $result$$Register, $tmp$$Register, 1);
-+    if (tpc == NULL) {
-+      ciEnv::current()->record_failure("CodeCache is full");
-+      return;
-+    }
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-+                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
-+                       iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr)
-+%{
-+  predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (AryEq ary1 ary2));
-+  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
++instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
++  match(If cmp (CmpUL op1 op2));
++  effect(USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_branch" %}
 +
-+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
 +  ins_encode %{
-+    address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
-+                                   $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
-+                                   $result$$Register, $tmp$$Register, 2);
-+    if (tpc == NULL) {
-+      ciEnv::current()->record_failure("CodeCache is full");
-+      return;
-+    }
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+// ============================================================================
-+// Safepoint Instructions
++instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{
++  match(CountedLoopEnd cmp (CmpUL op1 op2));
++  effect(USE lbl);
 +
-+instruct safePoint(iRegP poll)
-+%{
-+  match(SafePoint poll);
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpUL_loop" %}
 +
-+  ins_cost(2 * LOAD_COST);
-+  format %{
-+    "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint"
-+  %}
 +  ins_encode %{
-+    __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type);
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+  ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+// ============================================================================
-+// This name is KNOWN by the ADLC and cannot be changed.
-+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
-+// for this guy.
-+instruct tlsLoadP(javaThread_RegP dst)
++instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
 +%{
-+  match(Set dst (ThreadLocal));
++  match(If cmp (CmpP op1 op2));
 +
-+  ins_cost(0);
++  effect(USE lbl);
 +
-+  format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %}
++  ins_cost(BRANCH_COST * 2);
 +
-+  size(0);
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_branch" %}
 +
-+  ins_encode( /*empty*/ );
++  ins_encode %{
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++  %}
 +
-+  ins_pipe(pipe_class_empty);
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+// inlined locking and unlocking
-+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
-+instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
++instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl)
 +%{
-+  match(Set cr (FastLock object box));
-+  effect(TEMP tmp, TEMP tmp2);
-+
-+  ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3);
-+  format %{ "fastlock $object,$box\t! kills $tmp,$tmp2, #@cmpFastLock" %}
-+
-+  ins_encode(riscv_enc_fast_lock(object, box, tmp, tmp2));
++  match(CountedLoopEnd cmp (CmpP op1 op2));
 +
-+  ins_pipe(pipe_serial);
-+%}
++  effect(USE lbl);
 +
-+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
-+instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
-+%{
-+  match(Set cr (FastUnlock object box));
-+  effect(TEMP tmp, TEMP tmp2);
++  ins_cost(BRANCH_COST * 2);
 +
-+  ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4);
-+  format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2, #@cmpFastUnlock" %}
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpP_loop" %}
 +
-+  ins_encode(riscv_enc_fast_unlock(object, box, tmp, tmp2));
++  ins_encode %{
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++  %}
 +
-+  ins_pipe(pipe_serial);
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+// Tail Call; Jump from runtime stub to Java code.
-+// Also known as an 'interprocedural jump'.
-+// Target of jump will eventually return to caller.
-+// TailJump below removes the return address.
-+instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
++instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
 +%{
-+  match(TailCall jump_target method_oop);
++  match(If cmp (CmpN op1 op2));
 +
-+  ins_cost(BRANCH_COST);
++  effect(USE lbl);
 +
-+  format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %}
++  ins_cost(BRANCH_COST * 2);
 +
-+  ins_encode(riscv_enc_tail_call(jump_target));
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_branch" %}
 +
-+  ins_pipe(pipe_class_call);
++  ins_encode %{
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                       as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++  %}
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop)
++instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl)
 +%{
-+  match(TailJump jump_target ex_oop);
++  match(CountedLoopEnd cmp (CmpN op1 op2));
 +
-+  ins_cost(ALU_COST + BRANCH_COST);
++  effect(USE lbl);
 +
-+  format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %}
++  ins_cost(BRANCH_COST * 2);
 +
-+  ins_encode(riscv_enc_tail_jmp(jump_target));
++  format %{ "far_b$cmp  $op1, $op2, $lbl\t#@far_cmpN_loop" %}
 +
-+  ins_pipe(pipe_class_call);
++  ins_encode %{
++    __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg),
++                  as_Register($op2$$reg), *($lbl$$label), /* is_far */ true);
++  %}
++
++  ins_pipe(pipe_cmp_branch);
 +%}
 +
-+// Create exception oop: created by stack-crawling runtime code.
-+// Created exception is now available to this handler, and is setup
-+// just prior to jumping to this handler. No code emitted.
-+instruct CreateException(iRegP_R10 ex_oop)
++// Float compare and branch instructions
++instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
 +%{
-+  match(Set ex_oop (CreateEx));
++  match(If cmp (CmpF op1 op2));
 +
-+  ins_cost(0);
-+  format %{ " -- \t// exception oop; no code emitted, #@CreateException" %}
++  effect(USE lbl);
 +
-+  size(0);
++  ins_cost(XFER_COST + BRANCH_COST * 2);
++  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%}
 +
-+  ins_encode( /*empty*/ );
++  ins_encode %{
++    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
++                        *($lbl$$label), /* is_far */ true);
++  %}
 +
-+  ins_pipe(pipe_class_empty);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+// Rethrow exception: The exception oop will come in the first
-+// argument position. Then JUMP (not call) to the rethrow stub code.
-+instruct RethrowException()
++instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl)
 +%{
-+  match(Rethrow);
-+
-+  ins_cost(BRANCH_COST);
++  match(CountedLoopEnd cmp (CmpF op1 op2));
++  effect(USE lbl);
 +
-+  format %{ "j rethrow_stub\t#@RethrowException" %}
++  ins_cost(XFER_COST + BRANCH_COST * 2);
++  format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%}
 +
-+  ins_encode( riscv_enc_rethrow() );
++  ins_encode %{
++    __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg),
++                        *($lbl$$label), /* is_far */ true);
++  %}
 +
-+  ins_pipe(pipe_class_call);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+// Return Instruction
-+// epilog node loads ret address into ra as part of frame pop
-+instruct Ret()
++// Double compare and branch instructions
++instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
 +%{
-+  match(Return);
++  match(If cmp (CmpD op1 op2));
++  effect(USE lbl);
 +
-+  ins_cost(BRANCH_COST);
-+  format %{ "ret\t// return register, #@Ret" %}
++  ins_cost(XFER_COST + BRANCH_COST * 2);
++  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%}
 +
-+  ins_encode(riscv_enc_ret());
++  ins_encode %{
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
++  %}
 +
-+  ins_pipe(pipe_branch);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+// Die now.
-+instruct ShouldNotReachHere() %{
-+  match(Halt);
-+
-+  ins_cost(BRANCH_COST);
++instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpD op1 op2));
++  effect(USE lbl);
 +
-+  format %{ "#@ShouldNotReachHere" %}
++  ins_cost(XFER_COST + BRANCH_COST * 2);
++  format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%}
 +
 +  ins_encode %{
-+    if (is_reachable()) {
-+      __ halt();
-+    }
++    __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg),
++                        as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(pipe_class_default);
++  ins_pipe(pipe_class_compare);
 +%}
 +
++instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  match(If cmp (CmpI op1 zero));
 +
-+//----------PEEPHOLE RULES-----------------------------------------------------
-+// These must follow all instruction definitions as they use the names
-+// defined in the instructions definitions.
-+//
-+// peepmatch ( root_instr_name [preceding_instruction]* );
-+//
-+// peepconstraint %{
-+// (instruction_number.operand_name relational_op instruction_number.operand_name
-+//  [, ...] );
-+// // instruction numbers are zero-based using left to right order in peepmatch
-+//
-+// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
-+// // provide an instruction_number.operand_name for each operand that appears
-+// // in the replacement instruction's match rule
-+//
-+// ---------VM FLAGS---------------------------------------------------------
-+//
-+// All peephole optimizations can be turned off using -XX:-OptoPeephole
-+//
-+// Each peephole rule is given an identifying number starting with zero and
-+// increasing by one in the order seen by the parser.  An individual peephole
-+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
-+// on the command-line.
-+//
-+// ---------CURRENT LIMITATIONS----------------------------------------------
-+//
-+// Only match adjacent instructions in same basic block
-+// Only equality constraints
-+// Only constraints between operands, not (0.dest_reg == RAX_enc)
-+// Only one replacement instruction
-+//
-+//----------SMARTSPILL RULES---------------------------------------------------
-+// These must follow all instruction definitions as they use the names
-+// defined in the instructions definitions.
-+
-+// Local Variables:
-+// mode: c++
-+// End:
-diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
-new file mode 100644
-index 000000000..6f7055a39
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/riscv_b.ad
-@@ -0,0 +1,605 @@
-+//
-+// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
-+//
++  effect(USE op1, USE lbl);
 +
-+// RISCV Bit-Manipulation Extension Architecture Description File
++  ins_cost(BRANCH_COST * 2);
 +
-+instruct rorI_imm_b(iRegINoSp dst, iRegI src, immI rshift, immI lshift) %{
-+  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
-+  predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 32));
-+  effect(DEF dst, USE src);
-+  
-+  format %{ "roriw  $dst, $src, ($rshift & 0x1f)\t#@rorI_imm_b" %}
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x1f);
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+instruct rorL_imm_b(iRegLNoSp dst, iRegL src, immI rshift, immI lshift) %{
-+  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
-+  predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 64));
-+  effect(DEF dst, USE src);
-+
-+  format %{ "rori  $dst, $src, ($rshift & 0x3f)\t#@rorL_imm_b" %}
++instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpI op1 zero));
 +
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ rori(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x3f);
-+  %}
++  effect(USE op1, USE lbl);
 +
-+  ins_pipe(ialu_reg_shift);
-+%}
++  ins_cost(BRANCH_COST * 2);
 +
-+// ror expander
-+instruct rorI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{
-+  effect(DEF dst, USE src, USE shift);
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %}
 +
-+  format %{ "rorw  $dst, $src, $shift\t#@rorI_reg_b" %}
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// ror expander
-+instruct rorL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{
-+  effect(DEF dst, USE src, USE shift);
++instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  match(If cmp (CmpU op1 zero));
 +
-+  format %{ "ror  $dst, $src, $shift\t#@rorL_reg_b" %}
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
-+  %}
-+  ins_pipe(ialu_reg_reg);
-+%}
++  effect(USE op1, USE lbl);
 +
++  ins_cost(BRANCH_COST * 2);
 +
-+instruct rorI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI imm32 shift))));
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %}
 +
-+  expand %{
-+    rorI_reg_b(dst, src, shift);
++  ins_encode %{
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
-+%}
-+
-+instruct rorI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI zero shift))));
 +
-+  expand %{
-+    rorI_reg_b(dst, src, shift);
-+  %}
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+instruct rorL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI imm64 shift))));
++instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpU op1 zero));
 +
-+  expand %{
-+    rorL_reg_b(dst, src, shift);
-+  %}
-+%}
++  effect(USE op1, USE lbl);
 +
-+instruct rorL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI zero shift))));
++  ins_cost(BRANCH_COST * 2);
 +
-+  expand %{
-+    rorL_reg_b(dst, src, shift);
-+  %}
-+%}
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %}
 +
-+// rol expander
-+instruct rolI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{
-+  effect(DEF dst, USE src, USE shift);
 +
-+  format %{ "rolw  $dst, $src, $shift\t#@rolI_reg_b" %}
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// rol expander
-+instruct rolL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{
-+  effect(DEF dst, USE src, USE shift);
++// compare lt/ge unsigned instructs has no short instruct with same match
++instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  match(If cmp (CmpU op1 zero));
 +
-+  format %{ "rol  $dst, $src, $shift\t#@rolL_reg_b" %}
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg));
-+  %}
-+  
-+  ins_pipe(ialu_reg_reg);
-+%}
++  effect(USE op1, USE lbl);
 +
-+instruct rolI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI imm32 shift))));
++  ins_cost(BRANCH_COST);
 +
-+  expand %{
-+    rolI_reg_b(dst, src, shift);
-+  %}
-+%}
++  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %}
 +
-+instruct rolI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI zero shift))));
++  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
 +
-+  expand %{
-+    rolI_reg_b(dst, src, shift);
-+  %}
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+instruct rolL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI imm64 shift))));
++instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpU op1 zero));
 +
-+  expand %{
-+    rolL_reg_b(dst, src, shift);
-+  %}
-+%}
++  effect(USE op1, USE lbl);
 +
-+instruct rolL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI zero shift))));
++  ins_cost(BRANCH_COST);
 +
-+  expand %{
-+    rolL_reg_b(dst, src, shift);
-+  %}
-+%}
++  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %}
 +
-+// Convert oop into int for vectors alignment masking
-+instruct convP2I_b(iRegINoSp dst, iRegP src) %{
-+  predicate(UseZba);
-+  match(Set dst (ConvL2I (CastP2X src)));
++  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
 +
-+  format %{ "zext.w  $dst, $src\t# ptr -> int @convP2I_b" %}
++  ins_pipe(pipe_cmpz_branch);
++%}
 +
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
-+  %}
++instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  match(If cmp (CmpL op1 zero));
 +
-+  ins_pipe(ialu_reg);
-+%}
++  effect(USE op1, USE lbl);
 +
-+// byte to int
-+instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{
-+  predicate(UseZbb);
-+  match(Set dst (RShiftI (LShiftI src lshift) rshift));
++  ins_cost(BRANCH_COST * 2);
 +
-+  format %{ "sext.b  $dst, $src\t# b2i, #@convB2I_reg_reg_b" %}
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ sext_b(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// int to short
-+instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{
-+  predicate(UseZbb);
-+  match(Set dst (RShiftI (LShiftI src lshift) rshift));
-+
-+  format %{ "sext.h  $dst, $src\t# i2s, #@convI2S_reg_reg_b" %}
-+
-+  ins_cost(ALU_COST);
-+  ins_encode %{
-+    __ sext_h(as_Register($dst$$reg), as_Register($src$$reg));
-+  %}
++instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpL op1 zero));
 +
-+  ins_pipe(ialu_reg);
-+%}
++  effect(USE op1, USE lbl);
 +
-+// short to unsigned int
-+instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{
-+  predicate(UseZbb);
-+  match(Set dst (AndI src mask));
++  ins_cost(BRANCH_COST * 2);
 +
-+  format %{ "zext.h  $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %}
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ zext_h(as_Register($dst$$reg), as_Register($src$$reg));
++    __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// int to unsigned long (zero extend)
-+instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{
-+  predicate(UseZba);
-+  match(Set dst (AndL (ConvI2L src) mask));
++instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  match(If cmp (CmpUL op1 zero));
 +
-+  format %{ "zext.w  $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %}
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %}
 +
-+  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg_shift);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// BSWAP instructions
-+instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseZbb);
-+  match(Set dst (ReverseBytesI src));
++instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpUL op1 zero));
 +
-+  ins_cost(ALU_COST * 2);
-+  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int_b" %}
++  effect(USE op1, USE lbl);
++
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %}
 +
 +  ins_encode %{
-+    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
++    __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{
-+  predicate(UseZbb);
-+  match(Set dst (ReverseBytesL src));
++// compare lt/ge unsigned instructs has no short instruct with same match
++instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  match(If cmp (CmpUL op1 zero));
 +
-+  ins_cost(ALU_COST);
-+  format %{ "rev8  $dst, $src\t#@bytes_reverse_long_b" %}
++  effect(USE op1, USE lbl);
 +
-+  ins_encode %{
-+    __ rev8(as_Register($dst$$reg), as_Register($src$$reg));
-+  %}
++  ins_cost(BRANCH_COST);
 +
-+  ins_pipe(ialu_reg);
-+%}
++  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %}
 +
-+instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseZbb);
-+  match(Set dst (ReverseBytesUS src));
++  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
 +
-+  ins_cost(ALU_COST * 2);
-+  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short_b" %}
++  ins_pipe(pipe_cmpz_branch);
++%}
 +
-+  ins_encode %{
-+    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
-+  %}
++instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl)
++%{
++  match(CountedLoopEnd cmp (CmpUL op1 zero));
 +
-+  ins_pipe(ialu_reg);
-+%}
++  effect(USE op1, USE lbl);
 +
-+instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseZbb);
-+  match(Set dst (ReverseBytesS src));
++  ins_cost(BRANCH_COST);
 +
-+  ins_cost(ALU_COST * 2);
-+  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short_b" %}
++  format %{ "j  $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %}
 +
-+  ins_encode %{
-+    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
-+  %}
++  ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl));
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// Shift Add Pointer
-+instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{
-+  predicate(UseZba);
-+  match(Set dst (AddP src1 (LShiftL src2 imm)));
++instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE lbl);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %}
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %}
 +
 +  ins_encode %{
-+    __ shadd(as_Register($dst$$reg),
-+             as_Register($src2$$reg),
-+             as_Register($src1$$reg),
-+             t0,
-+             $imm$$constant);
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{
-+  predicate(UseZba);
-+  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm)));
++instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{
++  match(CountedLoopEnd cmp (CmpP op1 zero));
++  effect(USE lbl);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %}
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %}
 +
 +  ins_encode %{
-+    __ shadd(as_Register($dst$$reg),
-+             as_Register($src2$$reg),
-+             as_Register($src1$$reg),
-+             t0,
-+             $imm$$constant);
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// Shift Add Long
-+instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{
-+  predicate(UseZba);
-+  match(Set dst (AddL src1 (LShiftL src2 imm)));
++instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
++  match(If cmp (CmpN op1 zero));
++  effect(USE lbl);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %}
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %}
 +
 +  ins_encode %{
-+    __ shadd(as_Register($dst$$reg),
-+             as_Register($src2$$reg),
-+             as_Register($src1$$reg),
-+             t0,
-+             $imm$$constant);
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{
-+  predicate(UseZba);
-+  match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm)));
++instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{
++  match(CountedLoopEnd cmp (CmpN op1 zero));
++  effect(USE lbl);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %}
++  ins_cost(BRANCH_COST * 2);
++
++  format %{ "far_b$cmp  $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %}
 +
 +  ins_encode %{
-+    __ shadd(as_Register($dst$$reg),
-+             as_Register($src2$$reg),
-+             as_Register($src1$$reg),
-+             t0,
-+             $imm$$constant);
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+// Zeros Count instructions
-+instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseZbb);
-+  match(Set dst (CountLeadingZerosI src));
++instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  effect(USE lbl);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "clzw  $dst, $src\t#@countLeadingZerosI_b" %}
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %}
 +
 +  ins_encode %{
-+    __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{
-+  predicate(UseZbb);
-+  match(Set dst (CountLeadingZerosL src));
++instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{
++  match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero));
++  effect(USE lbl);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "clz  $dst, $src\t#@countLeadingZerosL_b" %}
++  ins_cost(BRANCH_COST * 2);
++  format %{ "far_b$cmp   $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %}
 +
 +  ins_encode %{
-+    __ clz(as_Register($dst$$reg), as_Register($src$$reg));
++    __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true);
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_cmpz_branch);
 +%}
 +
-+instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UseZbb);
-+  match(Set dst (CountTrailingZerosI src));
++// ============================================================================
++// Conditional Move Instructions
++instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "ctzw  $dst, $src\t#@countTrailingZerosI_b" %}
++  format %{
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpI\n\t"
++  %}
 +
 +  ins_encode %{
-+    __ ctzw(as_Register($dst$$reg), as_Register($src$$reg));
++    __ enc_cmove($cop$$cmpcode,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{
-+  predicate(UseZbb);
-+  match(Set dst (CountTrailingZerosL src));
++instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "ctz  $dst, $src\t#@countTrailingZerosL_b" %}
++  format %{
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpU\n\t"
++  %}
 +
 +  ins_encode %{
-+    __ ctz(as_Register($dst$$reg), as_Register($src$$reg));
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+// Population Count instructions
-+instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{
-+  predicate(UsePopCountInstruction);
-+  match(Set dst (PopCountI src));
++instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "cpopw  $dst, $src\t#@popCountI_b" %}
++  format %{
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpL\n\t"
++  %}
 +
 +  ins_encode %{
-+    __ cpopw(as_Register($dst$$reg), as_Register($src$$reg));
++    __ enc_cmove($cop$$cmpcode,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+// Note: Long/bitCount(long) returns an int.
-+instruct popCountL_b(iRegINoSp dst, iRegL src) %{
-+  predicate(UsePopCountInstruction);
-+  match(Set dst (PopCountL src));
++instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "cpop  $dst, $src\t#@popCountL_b" %}
++  format %{
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpUL\n\t"
++  %}
 +
 +  ins_encode %{
-+    __ cpop(as_Register($dst$$reg), as_Register($src$$reg));
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+// Max and Min
-+instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  predicate(UseZbb);
-+  match(Set dst (MinI src1 src2));
++instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "min  $dst, $src1, $src2\t#@minI_reg_b" %}
++  format %{
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpL\n\t"
++  %}
 +
 +  ins_encode %{
-+    __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
++    __ enc_cmove($cop$$cmpcode,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
-+  predicate(UseZbb);
-+  match(Set dst (MaxI src1 src2));
++instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
 +
-+  ins_cost(ALU_COST);
-+  format %{ "max  $dst, $src1, $src2\t#@maxI_reg_b" %}
++  format %{
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpUL\n\t"
++  %}
 +
 +  ins_encode %{
-+    __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+// Abs
-+instruct absI_reg_b(iRegINoSp dst, iRegI src) %{
-+  predicate(UseZbb);
-+  match(Set dst (AbsI src));
++instruct cmovL_cmpI(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpI op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
 +
-+  ins_cost(ALU_COST * 2);
 +  format %{
-+    "negw  t0, $src\n\t"
-+    "max  $dst, $src, t0\t#@absI_reg_b"
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpI\n\t"
 +  %}
 +
 +  ins_encode %{
-+    __ negw(t0, as_Register($src$$reg));
-+    __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
++    __ enc_cmove($cop$$cmpcode,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{
-+  predicate(UseZbb);
-+  match(Set dst (AbsL src));
++instruct cmovL_cmpU(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOpU cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpU op1 op2)) (Binary dst src)));
++  ins_cost(ALU_COST + BRANCH_COST);
 +
-+  ins_cost(ALU_COST * 2);
 +  format %{
-+    "neg  t0, $src\n\t"
-+    "max $dst, $src, t0\t#@absL_reg_b"
++    "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpU\n\t"
 +  %}
 +
 +  ins_encode %{
-+    __ neg(t0, as_Register($src$$reg));
-+    __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
++    __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask,
++                 as_Register($op1$$reg), as_Register($op2$$reg),
++                 as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_pipe(pipe_class_compare);
 +%}
 +
-+// And Not
-+instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
-+  predicate(UseZbb);
-+  match(Set dst (AndI src1 (XorI src2 m1)));
++// ============================================================================
++// Procedure Call/Return Instructions
 +
-+  ins_cost(ALU_COST);
-+  format %{ "andn  $dst, $src1, $src2\t#@andnI_reg_reg_b" %}
++// Call Java Static Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaDirect(method meth)
++%{
++  match(CallStaticJava);
 +
-+  ins_encode %{
-+    __ andn(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
-+  %}
++  effect(USE meth);
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_cost(BRANCH_COST);
++
++  format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %}
++
++  ins_encode(riscv_enc_java_static_call(meth),
++             riscv_enc_call_epilog);
++
++  ins_pipe(pipe_class_call);
++  ins_alignment(4);
 +%}
 +
-+instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
-+  predicate(UseZbb);
-+  match(Set dst (AndL src1 (XorL src2 m1)));
++// TO HERE
 +
-+  ins_cost(ALU_COST);
-+  format %{ "andn  $dst, $src1, $src2\t#@andnL_reg_reg_b" %}
++// Call Java Dynamic Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallDynamicJavaDirect(method meth, rFlagsReg cr)
++%{
++  match(CallDynamicJava);
 +
-+  ins_encode %{
-+    __ andn(as_Register($dst$$reg),
-+            as_Register($src1$$reg),
-+            as_Register($src2$$reg));
-+  %}
++  effect(USE meth, KILL cr);
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_cost(BRANCH_COST + ALU_COST * 6);
++
++  format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %}
++
++  ins_encode(riscv_enc_java_dynamic_call(meth),
++             riscv_enc_call_epilog);
++
++  ins_pipe(pipe_class_call);
++  ins_alignment(4);
 +%}
 +
-+// Or Not
-+instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrI src1 (XorI src2 m1)));
++// Call Runtime Instruction
 +
-+  ins_cost(ALU_COST);
-+  format %{ "orn  $dst, $src1, $src2\t#@ornI_reg_reg_b" %}
++instruct CallRuntimeDirect(method meth, rFlagsReg cr)
++%{
++  match(CallRuntime);
 +
-+  ins_encode %{
-+    __ orn(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
-+  %}
++  effect(USE meth, KILL cr);
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_cost(BRANCH_COST);
++
++  format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %}
++
++  ins_encode(riscv_enc_java_to_runtime(meth));
++
++  ins_pipe(pipe_class_call);
 +%}
 +
-+instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
-+  predicate(UseZbb);
-+  match(Set dst (OrL src1 (XorL src2 m1)));
++// Call Runtime Instruction
 +
-+  ins_cost(ALU_COST);
-+  format %{ "orn  $dst, $src1, $src2\t#@ornL_reg_reg_b" %}
++instruct CallLeafDirect(method meth, rFlagsReg cr)
++%{
++  match(CallLeaf);
 +
-+  ins_encode %{
-+    __ orn(as_Register($dst$$reg),
-+           as_Register($src1$$reg),
-+           as_Register($src2$$reg));
-+  %}
++  effect(USE meth, KILL cr);
 +
-+  ins_pipe(ialu_reg_reg);
++  ins_cost(BRANCH_COST);
++
++  format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %}
++
++  ins_encode(riscv_enc_java_to_runtime(meth));
++
++  ins_pipe(pipe_class_call);
 +%}
-diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
-new file mode 100644
-index 000000000..905041890
---- /dev/null
-+++ b/src/hotspot/cpu/riscv/riscv_v.ad
-@@ -0,0 +1,1723 @@
-+//
-+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2020, Arm Limited. All rights reserved.
-+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
-+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+//
-+// This code is free software; you can redistribute it and/or modify it
-+// under the terms of the GNU General Public License version 2 only, as
-+// published by the Free Software Foundation.
-+//
-+// This code is distributed in the hope that it will be useful, but WITHOUT
-+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+// version 2 for more details (a copy is included in the LICENSE file that
-+// accompanied this code).
-+//
-+// You should have received a copy of the GNU General Public License version
-+// 2 along with this work; if not, write to the Free Software Foundation,
-+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+//
-+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+// or visit www.oracle.com if you need additional information or have any
-+// questions.
-+//
++
++// Call Runtime Instruction
++
++instruct CallLeafNoFPDirect(method meth, rFlagsReg cr)
++%{
++  match(CallLeafNoFP);
++
++  effect(USE meth, KILL cr);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %}
++
++  ins_encode(riscv_enc_java_to_runtime(meth));
++
++  ins_pipe(pipe_class_call);
++%}
++
++// ============================================================================
++// Partial Subtype Check
 +//
++// superklass array for an instance of the superklass.  Set a hidden
++// internal cache on a hit (cache is checked with exposed code in
++// gen_subtype_check()).  Return zero for a hit.  The encoding
++// ALSO sets flags.
++
++instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, rFlagsReg cr)
++%{
++  match(Set result (PartialSubtypeCheck sub super));
++  effect(KILL tmp, KILL cr);
++
++  ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
++  format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %}
 +
-+// RISCV VEC Architecture Description File
++  ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result));
 +
-+opclass vmemA(indirect);
++  opcode(0x1); // Force zero of result reg on hit
 +
-+source_hpp %{
-+  bool op_vec_supported(int opcode);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+source %{
++instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp,
++                                   immP0 zero, rFlagsReg cr)
++%{
++  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
++  effect(KILL tmp, KILL result);
 +
-+  static inline BasicType vector_element_basic_type(const MachNode* n) {
-+    const TypeVect* vt = n->bottom_type()->is_vect();
-+    return vt->element_basic_type();
-+  }
++  ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4);
++  format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %}
 +
-+  static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
-+    int def_idx = use->operand_index(opnd);
-+    Node* def = use->in(def_idx);
-+    const TypeVect* vt = def->bottom_type()->is_vect();
-+    return vt->element_basic_type();
-+  }
++  ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result));
 +
-+  static void loadStore(MacroAssembler masm, bool is_store,
-+                        VectorRegister reg, BasicType bt, Register base) {
-+    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
-+    masm.vsetvli(t0, x0, sew);
-+    if (is_store) {
-+      masm.vsex_v(reg, base, sew);
-+    } else {
-+      masm.vlex_v(reg, base, sew);
-+    }
-+  }
-+
-+  bool op_vec_supported(int opcode) {
-+    switch (opcode) {
-+      // No multiply reduction instructions
-+      case Op_MulReductionVD:
-+      case Op_MulReductionVF:
-+      case Op_MulReductionVI:
-+      case Op_MulReductionVL:
-+      // Others
-+      case Op_Extract:
-+      case Op_ExtractB:
-+      case Op_ExtractC:
-+      case Op_ExtractD:
-+      case Op_ExtractF:
-+      case Op_ExtractI:
-+      case Op_ExtractL:
-+      case Op_ExtractS:
-+      case Op_ExtractUB:
-+        return false;
-+      default:
-+        return UseRVV;
-+    }
-+  }
++  opcode(0x0); // Don't zero result reg on hit
 +
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+definitions %{
-+  int_def VEC_COST             (200, 200);
++instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
++%{
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++
++  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
++  ins_encode %{
++    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
++                      StrIntrinsicNode::UU);
++  %}
++  ins_pipe(pipe_class_memory);
++%}
++
++instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                         iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
++%{
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++
++  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
++                      StrIntrinsicNode::LL);
++  %}
++  ins_pipe(pipe_class_memory);
++%}
++
++instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr)
++%{
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++
++  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
++                      StrIntrinsicNode::UL);
++  %}
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+// All VEC instructions
++instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                          iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3,
++                          rFlagsReg cr)
++%{
++  predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
++  match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 +
-+// vector load/store
-+instruct loadV(vReg dst, vmemA mem) %{
-+  match(Set dst (LoadVector mem));
-+  ins_cost(VEC_COST);
-+  format %{ "vle $dst, $mem\t#@loadV" %}
++  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
 +  ins_encode %{
-+    VectorRegister dst_reg = as_VectorRegister($dst$$reg);
-+    loadStore(MacroAssembler(&cbuf), false, dst_reg,
-+              vector_element_basic_type(this), as_Register($mem$$base));
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
++                      StrIntrinsicNode::LU);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct storeV(vReg src, vmemA mem) %{
-+  match(Set mem (StoreVector mem src));
-+  ins_cost(VEC_COST);
-+  format %{ "vse $src, $mem\t#@storeV" %}
++instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
++                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
++
++  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
 +  ins_encode %{
-+    VectorRegister src_reg = as_VectorRegister($src$$reg);
-+    loadStore(MacroAssembler(&cbuf), true, src_reg,
-+              vector_element_basic_type(this, $src), as_Register($mem$$base));
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $tmp3$$Register, $tmp4$$Register,
++                      $tmp5$$Register, $tmp6$$Register,
++                      $result$$Register, StrIntrinsicNode::UU);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+// vector abs
++instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
++                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
 +
-+instruct vabsB(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVB src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
++  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $tmp3$$Register, $tmp4$$Register,
++                      $tmp5$$Register, $tmp6$$Register,
++                      $result$$Register, StrIntrinsicNode::LL);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct vabsS(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVS src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
++                          iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
++                          iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
++  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
 +
-+instruct vabsI(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVI src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $tmp3$$Register, $tmp4$$Register,
++                      $tmp5$$Register, $tmp6$$Register,
++                      $result$$Register, StrIntrinsicNode::UL);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct vabsL(vReg dst, vReg src, vReg tmp) %{
-+  match(Set dst (AbsVL src));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t"
-+            "vmax.vv $dst, $tmp, $src" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
-+    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
++                              immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
++                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
 +
-+instruct vabsF(vReg dst, vReg src) %{
-+  match(Set dst (AbsVF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
 +
-+instruct vabsD(vReg dst, vReg src) %{
-+  match(Set dst (AbsVD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
++    int icnt2 = (int)$int_cnt2$$constant;
++    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
++                                 $cnt1$$Register, zr,
++                                 $tmp1$$Register, $tmp2$$Register,
++                                 $tmp3$$Register, $tmp4$$Register,
++                                 icnt2, $result$$Register, StrIntrinsicNode::UU);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+// vector add
++instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
++                              immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
++                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
 +
-+instruct vaddB(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVB src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %}
++  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    int icnt2 = (int)$int_cnt2$$constant;
++    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
++                                 $cnt1$$Register, zr,
++                                 $tmp1$$Register, $tmp2$$Register,
++                                 $tmp3$$Register, $tmp4$$Register,
++                                 icnt2, $result$$Register, StrIntrinsicNode::LL);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct vaddS(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVS src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2,
++                              immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
++                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
 +
-+instruct vaddI(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVI src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %}
++  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    int icnt2 = (int)$int_cnt2$$constant;
++    __ string_indexof_linearscan($str1$$Register, $str2$$Register,
++                                 $cnt1$$Register, zr,
++                                 $tmp1$$Register, $tmp2$$Register,
++                                 $tmp3$$Register, $tmp4$$Register,
++                                 icnt2, $result$$Register, StrIntrinsicNode::UL);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct vaddL(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVL src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vadd_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
++                              iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
++                              iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
++%{
++  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
++  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
 +
-+instruct vaddF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %}
++  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfadd_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
++    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
++                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
++                           $tmp3$$Register, $tmp4$$Register, false /* isU */);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct vaddD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AddVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfadd_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+// vector and
++// clearing of an array
++instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
++%{
++  match(Set dummy (ClearArray cnt base));
++  effect(USE_KILL cnt, USE_KILL base);
++
++  ins_cost(4 * DEFAULT_COST);
++  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
 +
-+instruct vand(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (AndV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vand.vv  $dst, $src1, $src2\t#@vand" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vand_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    address tpc = __ zero_words($base$$Register, $cnt$$Register);
++    if (tpc == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+// vector or
++instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr)
++%{
++  predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
++  match(Set dummy (ClearArray cnt base));
++  effect(USE_KILL base, KILL cr);
++
++  ins_cost(4 * DEFAULT_COST);
++  format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %}
 +
-+instruct vor(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (OrV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vor.vv  $dst, $src1, $src2\t#@vor" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vor_vv(as_VectorRegister($dst$$reg),
-+              as_VectorRegister($src1$$reg),
-+              as_VectorRegister($src2$$reg));
++    __ zero_words($base$$Register, (uint64_t)$cnt$$constant);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+// vector xor
++instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
++                        iRegI_R10 result, rFlagsReg cr)
++%{
++  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
 +
-+instruct vxor(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (XorV src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vxor.vv  $dst, $src1, $src2\t#@vxor" %}
++  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vxor_vv(as_VectorRegister($dst$$reg),
-+               as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
++    __ string_equals($str1$$Register, $str2$$Register,
++                     $result$$Register, $cnt$$Register, 1);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+// vector float div
++instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
++                        iRegI_R10 result, rFlagsReg cr)
++%{
++  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
 +
-+instruct vdivF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (DivVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivF" %}
++  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfdiv_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
++    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
++    __ string_equals($str1$$Register, $str2$$Register,
++                     $result$$Register, $cnt$$Register, 2);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+instruct vdivD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (DivVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivD" %}
++instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
++                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
++                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
++%{
++  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (AryEq ary1 ary2));
++  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
++
++  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp5" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfdiv_vv(as_VectorRegister($dst$$reg),
-+                as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
++    __ arrays_equals($ary1$$Register, $ary2$$Register,
++                     $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
++                     $result$$Register, $tmp5$$Register, 1);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+// vector fmla
++instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
++                       iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3,
++                       iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr)
++%{
++  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (AryEq ary1 ary2));
++  effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr);
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %}
++  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp5" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ arrays_equals($ary1$$Register, $ary2$$Register,
++                     $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
++                     $result$$Register, $tmp5$$Register, 2);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_memory);
 +%}
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// ============================================================================
++// Safepoint Instructions
 +
-+// vector fmls
++instruct safePoint(iRegP poll)
++%{
++  match(SafePoint poll);
 +
-+// dst_src1 = dst_src1 + -src2 * src3
-+// dst_src1 = dst_src1 + src2 * -src3
-+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
-+  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++  ins_cost(2 * LOAD_COST);
++  format %{
++    "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint"
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
-+
-+// dst_src1 = dst_src1 + -src2 * src3
-+// dst_src1 = dst_src1 + src2 * -src3
-+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
-+  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
++    __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type);
 +  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
 +%}
 +
-+// vector fnmla
++// ============================================================================
++// This name is KNOWN by the ADLC and cannot be changed.
++// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
++// for this guy.
++instruct tlsLoadP(javaThread_RegP dst)
++%{
++  match(Set dst (ThreadLocal));
 +
-+// dst_src1 = -dst_src1 + -src2 * src3
-+// dst_src1 = -dst_src1 + src2 * -src3
-+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
-+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(0);
 +
-+// dst_src1 = -dst_src1 + -src2 * src3
-+// dst_src1 = -dst_src1 + src2 * -src3
-+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
-+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
-+  ins_cost(VEC_COST);
-+  format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %}
 +
-+// vector fnmls
++  size(0);
 +
-+// dst_src1 = -dst_src1 + src2 * src3
-+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_encode( /*empty*/ );
 +
-+// dst_src1 = -dst_src1 + src2 * src3
-+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
-+  predicate(UseFMA);
-+  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_empty);
 +%}
 +
-+// vector mla
++// inlined locking and unlocking
++// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
++instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
++%{
++  match(Set cr (FastLock object box));
++  effect(TEMP tmp1, TEMP tmp2);
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3);
++  format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2, #@cmpFastLock" %}
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_encode(riscv_enc_fast_lock(object, box, tmp1, tmp2));
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// dst_src1 = dst_src1 + src2 * src3
-+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
-+                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// using t1 as the 'flag' register to bridge the BoolNode producers and consumers
++instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2)
++%{
++  match(Set cr (FastUnlock object box));
++  effect(TEMP tmp1, TEMP tmp2);
 +
-+// vector mls
++  ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4);
++  format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %}
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_encode(riscv_enc_fast_unlock(object, box, tmp1, tmp2));
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_serial);
 +%}
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
++%{
++  match(TailCall jump_target method_oop);
 +
-+// dst_src1 = dst_src1 - src2 * src3
-+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
-+  match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
-+  ins_cost(VEC_COST);
-+  format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
-+                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(BRANCH_COST);
 +
-+// vector mul
++  format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %}
 +
-+instruct vmulB(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVB src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_encode(riscv_enc_tail_call(jump_target));
 +
-+instruct vmulS(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVS src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_call);
 +%}
 +
-+instruct vmulI(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVI src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop)
++%{
++  match(TailJump jump_target ex_oop);
 +
-+instruct vmulL(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVL src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(ALU_COST + BRANCH_COST);
 +
-+instruct vmulF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %}
 +
-+instruct vmulD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (MulVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_encode(riscv_enc_tail_jmp(jump_target));
++
++  ins_pipe(pipe_class_call);
 +%}
 +
-+// vector fneg
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler. No code emitted.
++instruct CreateException(iRegP_R10 ex_oop)
++%{
++  match(Set ex_oop (CreateEx));
 +
-+instruct vnegF(vReg dst, vReg src) %{
-+  match(Set dst (NegVF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(0);
++  format %{ " -- \t// exception oop; no code emitted, #@CreateException" %}
 +
-+instruct vnegD(vReg dst, vReg src) %{
-+  match(Set dst (NegVD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  size(0);
 +
-+// popcount vector
++  ins_encode( /*empty*/ );
 +
-+instruct vpopcountI(iRegINoSp dst, vReg src) %{
-+  match(Set dst (PopCountVI src));
-+  format %{ "vpopc.m $dst, $src\t#@vpopcountI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(pipe_class_empty);
 +%}
 +
-+// vector add reduction
++// Rethrow exception: The exception oop will come in the first
++// argument position. Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException()
++%{
++  match(Rethrow);
 +
-+instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-+  match(Set dst (AddReductionVI src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_cost(BRANCH_COST);
++
++  format %{ "j rethrow_stub\t#@RethrowException" %}
++
++  ins_encode(riscv_enc_rethrow());
++
++  ins_pipe(pipe_class_call);
 +%}
 +
-+instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
-+  match(Set dst (AddReductionVI src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++// Return Instruction
++// epilog node loads ret address into ra as part of frame pop
++instruct Ret()
++%{
++  match(Return);
++
++  ins_cost(BRANCH_COST);
++  format %{ "ret\t// return register, #@Ret" %}
++
++  ins_encode(riscv_enc_ret());
++
++  ins_pipe(pipe_branch);
 +%}
 +
-+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
-+  match(Set dst (AddReductionVI src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
++// Die now.
++instruct ShouldNotReachHere() %{
++  match(Halt);
++
++  ins_cost(BRANCH_COST);
++
++  format %{ "#@ShouldNotReachHere" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++    Assembler::CompressibleRegion cr(&_masm);
++    if (is_reachable()) {
++      __ halt();
++    }
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(pipe_class_default);
 +%}
 +
-+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
-+  predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-+  match(Set dst (AddReductionVL src1 src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t"
-+            "vredsum.vs $tmp, $src2, $tmp\n\t"
-+            "vmv.x.s  $dst, $tmp" %}
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++//  [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser.  An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == RAX_enc)
++// Only one replacement instruction
++//
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++
++// Local Variables:
++// mode: c++
++// End:
+diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad
+new file mode 100644
+index 0000000000..7dda004cd3
+--- /dev/null
++++ b/src/hotspot/cpu/riscv/riscv_b.ad
+@@ -0,0 +1,466 @@
++//
++// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// RISCV Bit-Manipulation Extension Architecture Description File
++
++// Convert oop into int for vectors alignment masking
++instruct convP2I_b(iRegINoSp dst, iRegP src) %{
++  predicate(UseZba);
++  match(Set dst (ConvL2I (CastP2X src)));
++
++  format %{ "zext.w  $dst, $src\t# ptr -> int @convP2I_b" %}
++
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
-+    __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                  as_VectorRegister($tmp$$reg));
-+    __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
++    __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{
-+  match(Set src1_dst (AddReductionVF src1_dst src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t"
-+            "vfredosum.vs $tmp, $src2, $tmp\n\t"
-+            "vfmv.f.s $src1_dst, $tmp" %}
++// byte to int
++instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{
++  predicate(UseZbb);
++  match(Set dst (RShiftI (LShiftI src lshift) rshift));
++
++  format %{ "sext.b  $dst, $src\t# b2i, #@convB2I_reg_reg_b" %}
++
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
-+    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+         as_VectorRegister($tmp$$reg));
-+    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
++    __ sext_b(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{
-+  match(Set src1_dst (AddReductionVD src1_dst src2));
-+  effect(TEMP tmp);
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t"
-+            "vfredosum.vs $tmp, $src2, $tmp\n\t"
-+            "vfmv.f.s $src1_dst, $tmp" %}
++// int to short
++instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{
++  predicate(UseZbb);
++  match(Set dst (RShiftI (LShiftI src lshift) rshift));
++
++  format %{ "sext.h  $dst, $src\t# i2s, #@convI2S_reg_reg_b" %}
++
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
-+    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
-+                    as_VectorRegister($tmp$$reg));
-+    __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
++    __ sext_h(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+// vector replicate
++// short to unsigned int
++instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{
++  predicate(UseZbb);
++  match(Set dst (AndI src mask));
++
++  format %{ "zext.h  $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %}
 +
-+instruct replicateB(vReg dst, iRegIorL2I src) %{
-+  match(Set dst (ReplicateB src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateB" %}
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
++    __ zext_h(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct replicateS(vReg dst, iRegIorL2I src) %{
-+  match(Set dst (ReplicateS src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct replicateI(vReg dst, iRegIorL2I src) %{
-+  match(Set dst (ReplicateI src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// int to unsigned long (zero extend)
++instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{
++  predicate(UseZba);
++  match(Set dst (AndL (ConvI2L src) mask));
 +
-+instruct replicateL(vReg dst, iRegL src) %{
-+  match(Set dst (ReplicateL src));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.x  $dst, $src\t#@replicateL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  format %{ "zext.w  $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %}
 +
-+instruct replicateB_imm5(vReg dst, immI5 con) %{
-+  match(Set dst (ReplicateB con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateB_imm5" %}
++  ins_cost(ALU_COST);
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
++    __ zext_w(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct replicateS_imm5(vReg dst, immI5 con) %{
-+  match(Set dst (ReplicateS con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateS_imm5" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg_shift);
 +%}
 +
-+instruct replicateI_imm5(vReg dst, immI5 con) %{
-+  match(Set dst (ReplicateI con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateI_imm5" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// BSWAP instructions
++instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
++  match(Set dst (ReverseBytesI src));
 +
-+instruct replicateL_imm5(vReg dst, immL5 con) %{
-+  match(Set dst (ReplicateL con));
-+  ins_cost(VEC_COST);
-+  format %{ "vmv.v.i  $dst, $con\t#@replicateL_imm5" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(ALU_COST * 2);
++  format %{ "revb_w_w  $dst, $src\t#@bytes_reverse_int_b" %}
 +
-+instruct replicateF(vReg dst, fRegF src) %{
-+  match(Set dst (ReplicateF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.v.f  $dst, $src\t#@replicateF" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
++    __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct replicateD(vReg dst, fRegD src) %{
-+  match(Set dst (ReplicateD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfmv.v.f  $dst, $src\t#@replicateD" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg);
 +%}
 +
-+// vector shift
-+
-+instruct vasrB(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t"
-+            "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               BitsPerByte - 1, Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{
++  predicate(UseZbb);
++  match(Set dst (ReverseBytesL src));
 +
-+instruct vasrS(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t"
-+            "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               BitsPerShort - 1, Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(ALU_COST);
++  format %{ "rev8  $dst, $src\t#@bytes_reverse_long_b" %}
 +
-+instruct vasrI(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
++    __ rev8(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vasrL(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (RShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+         as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vlslB(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  effect( TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    // if shift > BitsPerByte - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
++  match(Set dst (ReverseBytesUS src));
 +
-+instruct vlslS(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    // if shift > BitsPerShort - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(ALU_COST * 2);
++  format %{ "revb_h_h_u  $dst, $src\t#@bytes_reverse_unsigned_short_b" %}
 +
-+instruct vlslI(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
++    __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vlslL(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (LShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %}
++instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
++  match(Set dst (ReverseBytesS src));
++
++  ins_cost(ALU_COST * 2);
++  format %{ "revb_h_h  $dst, $src\t#@bytes_reverse_short_b" %}
++
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
++    __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vlsrB(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    // if shift > BitsPerByte - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vlsrS(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP_DEF dst);
-+  format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t"
-+            "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
-+            "vmnot.m v0, v0\n\t"
-+            "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    // if shift > BitsPerShort - 1, clear the element
-+    __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
-+    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($src$$reg), Assembler::v0_t);
-+    // otherwise, shift
-+    __ vmnot_m(v0, v0);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg), Assembler::v0_t);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// Shift Add Pointer
++instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{
++  predicate(UseZba);
++  match(Set dst (AddP src1 (LShiftL src2 imm)));
 +
++  ins_cost(ALU_COST);
++  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %}
 +
-+instruct vlsrI(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
++    __ shadd(as_Register($dst$$reg),
++             as_Register($src2$$reg),
++             as_Register($src1$$reg),
++             t0,
++             $imm$$constant);
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
++instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{
++  predicate(UseZba);
++  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm)));
 +
-+instruct vlsrL(vReg dst, vReg src, vReg shift) %{
-+  match(Set dst (URShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+               as_VectorRegister($shift$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(ALU_COST);
++  format %{ "shadd  $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %}
 +
-+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (RShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %}
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerByte) con = BitsPerByte - 1;
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++    __ shadd(as_Register($dst$$reg),
++             as_Register($src2$$reg),
++             as_Register($src1$$reg),
++             t0,
++             $imm$$constant);
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (RShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerShort) con = BitsPerShort - 1;
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (RShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// Shift Add Long
++instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{
++  predicate(UseZba);
++  match(Set dst (AddL src1 (LShiftL src2 imm)));
 +
-+instruct vasrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{
-+  predicate((n->in(2)->get_int() & 0x3f) < 64);
-+  match(Set dst (RShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x3f;
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con < 32) {
-+      __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+    } else {
-+      __ li(t0, con);
-+      __ vmv_v_x(as_VectorRegister($tmp$$reg), t0);
-+      __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg));
-+    }
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(ALU_COST);
++  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %}
 +
-+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (URShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %}
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerByte) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++    __ shadd(as_Register($dst$$reg),
++             as_Register($src2$$reg),
++             as_Register($src1$$reg),
++             t0,
++             $imm$$constant);
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (URShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con >= BitsPerShort) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (URShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{
++  predicate(UseZba);
++  match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm)));
 +
-+instruct vlsrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{
-+  predicate((n->in(2)->get_int() & 0x3f) < 64);
-+  match(Set dst (URShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x3f;
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    if (con == 0) {
-+      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    if (con < 32) {
-+      __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+    } else {
-+      __ li(t0, con);
-+      __ vmv_v_x(as_VectorRegister($tmp$$reg), t0);
-+      __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg));
-+    }
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(ALU_COST);
++  format %{ "shadd  $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %}
 +
-+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (LShiftVB src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %}
 +  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    if (con >= BitsPerByte) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
++    __ shadd(as_Register($dst$$reg),
++             as_Register($src2$$reg),
++             as_Register($src1$$reg),
++             t0,
++             $imm$$constant);
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (LShiftVS src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    if (con >= BitsPerShort) {
-+      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
-+                 as_VectorRegister($src$$reg));
-+      return;
-+    }
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
-+  match(Set dst (LShiftVI src shift));
-+  ins_cost(VEC_COST);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x1f;
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// Zeros Count instructions
++instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
++  match(Set dst (CountLeadingZerosI src));
 +
-+instruct vlslL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{
-+  predicate((n->in(2)->get_int() & 0x3f) < 64);
-+  match(Set dst (LShiftVL src shift));
-+  ins_cost(VEC_COST);
-+  effect(TEMP tmp);
-+  format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %}
-+  ins_encode %{
-+    uint32_t con = (unsigned)$shift$$constant & 0x3f;
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    if (con < 32) {
-+      __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
-+    } else {
-+      __ li(t0, con);
-+      __ vmv_v_x(as_VectorRegister($tmp$$reg), t0);
-+      __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg));
-+    }
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(ALU_COST);
++  format %{ "clzw  $dst, $src\t#@countLeadingZerosI_b" %}
 +
-+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
++    __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
-+            n->bottom_type()->is_vect()->element_basic_type() == T_CHAR);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{
++  predicate(UseZbb);
++  match(Set dst (CountLeadingZerosL src));
++
++  ins_cost(ALU_COST);
++  format %{ "clz  $dst, $src\t#@countLeadingZerosL_b" %}
 +
-+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
-+  predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
-+  match(Set dst (LShiftCntV cnt));
-+  match(Set dst (RShiftCntV cnt));
-+  format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
++    __ clz(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+// vector sqrt
++instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UseZbb);
++  match(Set dst (CountTrailingZerosI src));
 +
-+instruct vsqrtF(vReg dst, vReg src) %{
-+  match(Set dst (SqrtVF src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(ALU_COST);
++  format %{ "ctzw  $dst, $src\t#@countTrailingZerosI_b" %}
 +
-+instruct vsqrtD(vReg dst, vReg src) %{
-+  match(Set dst (SqrtVD src));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
++    __ ctzw(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+// vector sub
++instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{
++  predicate(UseZbb);
++  match(Set dst (CountTrailingZerosL src));
 +
-+instruct vsubB(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVB src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e8);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(ALU_COST);
++  format %{ "ctz  $dst, $src\t#@countTrailingZerosL_b" %}
 +
-+instruct vsubS(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVS src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e16);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
++    __ ctz(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
-+%}
 +
-+instruct vsubI(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVI src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vsubL(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVL src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+               as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++// Population Count instructions
++instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{
++  predicate(UsePopCountInstruction);
++  match(Set dst (PopCountI src));
 +
-+instruct vsubF(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVF src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %}
-+  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e32);
-+    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
-+  %}
-+  ins_pipe(pipe_slow);
-+%}
++  ins_cost(ALU_COST);
++  format %{ "cpopw  $dst, $src\t#@popCountI_b" %}
 +
-+instruct vsubD(vReg dst, vReg src1, vReg src2) %{
-+  match(Set dst (SubVD src1 src2));
-+  ins_cost(VEC_COST);
-+  format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %}
 +  ins_encode %{
-+    __ vsetvli(t0, x0, Assembler::e64);
-+    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
-+                as_VectorRegister($src2$$reg));
++    __ cpopw(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_slow);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-+                         iRegI_R10 result, vReg_V1 v1,
-+                         vReg_V2 v2, vReg_V3 v3, rFlagsReg r6)
-+%{
-+  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (StrEquals (Binary str1 str2) cnt));
-+  effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3);
++// Note: Long/bitCount(long) returns an int.
++instruct popCountL_b(iRegINoSp dst, iRegL src) %{
++  predicate(UsePopCountInstruction);
++  match(Set dst (PopCountL src));
++
++  ins_cost(ALU_COST);
++  format %{ "cpop  $dst, $src\t#@popCountL_b" %}
 +
-+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
 +  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_equals_v($str1$$Register, $str2$$Register,
-+                       $result$$Register, $cnt$$Register, 1);
++    __ cpop(as_Register($dst$$reg), as_Register($src$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
-+                         iRegI_R10 result, vReg_V1 v1,
-+                         vReg_V2 v2, vReg_V3 v3, rFlagsReg r6)
-+%{
-+  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (StrEquals (Binary str1 str2) cnt));
-+  effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3);
++// Max and Min
++instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  predicate(UseZbb);
++  match(Set dst (MinI src1 src2));
++
++  ins_cost(ALU_COST);
++  format %{ "min  $dst, $src1, $src2\t#@minI_reg_b" %}
 +
-+  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
 +  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_equals_v($str1$$Register, $str2$$Register,
-+                       $result$$Register, $cnt$$Register, 2);
++    __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+
-+instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-+                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6)
-+%{
-+  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result (AryEq ary1 ary2));
-+  effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6);
 +
-+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
-+  ins_encode %{
-+    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
-+                       $result$$Register, $tmp$$Register, 1);
-+    %}
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
-+                        vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6)
-+%{
-+  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result (AryEq ary1 ary2));
-+  effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6);
++instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{
++  predicate(UseZbb);
++  match(Set dst (MaxI src1 src2));
++
++  ins_cost(ALU_COST);
++  format %{ "max  $dst, $src1, $src2\t#@maxI_reg_b" %}
 +
-+  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
 +  ins_encode %{
-+    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
-+                       $result$$Register, $tmp$$Register, 2);
++    __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                          iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
++// Abs
++instruct absI_reg_b(iRegINoSp dst, iRegI src) %{
++  predicate(UseZbb);
++  match(Set dst (AbsI src));
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
-+  ins_encode %{
-+    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::UU);
++  ins_cost(ALU_COST * 2);
++  format %{
++    "negw  t0, $src\n\t"
++    "max  $dst, $src, t0\t#@absI_reg_b"
 +  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                          iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                          iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
 +  ins_encode %{
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::LL);
++    __ negw(t0, as_Register($src$$reg));
++    __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                           iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
++instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{
++  predicate(UseZbb);
++  match(Set dst (AbsL src));
 +
-+  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
-+  ins_encode %{
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::UL);
++  ins_cost(ALU_COST * 2);
++  format %{
++    "neg  t0, $src\n\t"
++    "max  $dst, $src, t0\t#@absL_reg_b"
 +  %}
-+  ins_pipe(pipe_class_memory);
-+%}
-+instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
-+                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
-+                           iRegP_R28 tmp1, iRegL_R29 tmp2)
-+%{
-+  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
-+  match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
-+  effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
 +
-+  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
 +  ins_encode %{
-+    __ string_compare_v($str1$$Register, $str2$$Register,
-+                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
-+                        $tmp1$$Register, $tmp2$$Register,
-+                        StrIntrinsicNode::LU);
++    __ neg(t0, as_Register($src$$reg));
++    __ max(as_Register($dst$$reg), as_Register($src$$reg), t0);
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(ialu_reg);
 +%}
 +
-+// fast byte[] to char[] inflation
-+instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
-+                         vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set dummy (StrInflatedCopy src (Binary dst len)));
-+  effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
++// And Not
++instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
++  predicate(UseZbb);
++  match(Set dst (AndI src1 (XorI src2 m1)));
++
++  ins_cost(ALU_COST);
++  format %{ "andn  $dst, $src1, $src2\t#@andnI_reg_reg_b" %}
 +
-+  format %{ "String Inflate $src,$dst" %}
 +  ins_encode %{
-+    address tpc = __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
-+    if (tpc == NULL) {
-+      ciEnv::current()->record_failure("CodeCache is full");
-+      return;
-+    }
++    __ andn(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
-+  ins_pipe(pipe_class_memory);
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// encode char[] to byte[] in ISO_8859_1
-+instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
-+                           vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set result (EncodeISOArray src (Binary dst len)));
-+  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
++instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
++  predicate(UseZbb);
++  match(Set dst (AndL src1 (XorL src2 m1)));
++
++  ins_cost(ALU_COST);
++  format %{ "andn  $dst, $src1, $src2\t#@andnL_reg_reg_b" %}
 +
-+  format %{ "Encode array $src,$dst,$len -> $result" %}
 +  ins_encode %{
-+    __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
-+                          $result$$Register, $tmp$$Register);
++    __ andn(as_Register($dst$$reg),
++            as_Register($src1$$reg),
++            as_Register($src2$$reg));
 +  %}
-+  ins_pipe( pipe_class_memory );
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+// fast char[] to byte[] compression
-+instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
-+                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set result (StrCompressedCopy src (Binary dst len)));
-+  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
-+         TEMP v1, TEMP v2, TEMP v3, TEMP tmp);
++// Or Not
++instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{
++  predicate(UseZbb);
++  match(Set dst (OrI src1 (XorI src2 m1)));
++
++  ins_cost(ALU_COST);
++  format %{ "orn  $dst, $src1, $src2\t#@ornI_reg_reg_b" %}
 +
-+  format %{ "String Compress $src,$dst -> $result    // KILL R11, R12, R13" %}
 +  ins_encode %{
-+    __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register,
-+                             $result$$Register, $tmp$$Register);
++    __ orn(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
-+  ins_pipe( pipe_slow );
++
++  ins_pipe(ialu_reg_reg);
 +%}
 +
-+instruct vhas_negatives(iRegP_R11 ary1, iRegI_R12 len, iRegI_R10 result, iRegL tmp)
-+%{
-+  predicate(UseRVV);
-+  match(Set result (HasNegatives ary1 len));
-+  effect(USE_KILL ary1, USE_KILL len, TEMP tmp);
-+  format %{ "has negatives byte[] $ary1,$len -> $result" %}
++instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{
++  predicate(UseZbb);
++  match(Set dst (OrL src1 (XorL src2 m1)));
++
++  ins_cost(ALU_COST);
++  format %{ "orn  $dst, $src1, $src2\t#@ornL_reg_reg_b" %}
++
 +  ins_encode %{
-+    address tpc = __ has_negatives_v($ary1$$Register, $len$$Register, $result$$Register, $tmp$$Register);
-+    if (tpc == NULL) {
-+      ciEnv::current()->record_failure("CodeCache is full");
-+      return;
-+    }
++    __ orn(as_Register($dst$$reg),
++           as_Register($src1$$reg),
++           as_Register($src2$$reg));
 +  %}
-+  ins_pipe( pipe_slow );
-+%}
 +
-+// clearing of an array
-+instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
-+                             vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
-+%{
-+  predicate(UseRVV);
-+  match(Set dummy (ClearArray cnt base));
-+  effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
++  ins_pipe(ialu_reg_reg);
 +
-+  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
++%}
 +
++// AndI 0b0..010..0 + ConvI2B
++instruct convI2Bool_andI_reg_immIpowerOf2(iRegINoSp dst, iRegIorL2I src, immIpowerOf2 mask) %{
++  predicate(UseZbs);
++  match(Set dst (Conv2B (AndI src mask)));
++  ins_cost(ALU_COST);
++
++  format %{ "bexti  $dst, $src, $mask\t#@convI2Bool_andI_reg_immIpowerOf2" %}
 +  ins_encode %{
-+    __ clear_array_v($base$$Register, $cnt$$Register);
++    __ bexti($dst$$Register, $src$$Register, exact_log2((juint)($mask$$constant)));
 +  %}
 +
-+  ins_pipe(pipe_class_memory);
++  ins_pipe(ialu_reg_reg);
 +%}
+\ No newline at end of file
 diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
 new file mode 100644
-index 000000000..9922ff4cf
+index 0000000000..f41a496093
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp
-@@ -0,0 +1,2738 @@
+@@ -0,0 +1,2666 @@
 +/*
-+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -40637,15 +38433,24 @@ index 000000000..9922ff4cf
 +#include "code/debugInfoRec.hpp"
 +#include "code/icBuffer.hpp"
 +#include "code/vtableStubs.hpp"
++#include "compiler/oopMap.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
 +#include "interpreter/interp_masm.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "logging/log.hpp"
 +#include "memory/resourceArea.hpp"
++#include "nativeInst_riscv.hpp"
 +#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/jniHandles.hpp"
 +#include "runtime/safepointMechanism.hpp"
 +#include "runtime/sharedRuntime.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubRoutines.hpp"
 +#include "runtime/vframeArray.hpp"
 +#include "utilities/align.hpp"
++#include "utilities/formatBuffer.hpp"
 +#include "vmreg_riscv.inline.hpp"
 +#ifdef COMPILER1
 +#include "c1/c1_Runtime1.hpp"
@@ -40678,9 +38483,8 @@ index 000000000..9922ff4cf
 +};
 +
 +class RegisterSaver {
-+  const bool _save_vectors;
 + public:
-+  RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {}
++  RegisterSaver() {}
 +  ~RegisterSaver() {}
 +  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
 +  void restore_live_registers(MacroAssembler* masm);
@@ -40689,11 +38493,7 @@ index 000000000..9922ff4cf
 +  // Used by deoptimization when it is managing result register
 +  // values on its own
 +  // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
-+  // |---v0---|<---SP
-+  // |---v1---|save vectors only in generate_handler_blob
-+  // |-- .. --|
-+  // |---v31--|-----
-+  // |---f0---|
++  // |---f0---|<---SP
 +  // |---f1---|
 +  // |   ..   |
 +  // |---f31--|
@@ -40704,16 +38504,8 @@ index 000000000..9922ff4cf
 +  // |---x31--|
 +  // |---fp---|
 +  // |---ra---|
-+  int v0_offset_in_bytes(void) { return 0; }
 +  int f0_offset_in_bytes(void) {
-+    int f0_offset = 0;
-+#ifdef COMPILER2
-+    if (_save_vectors) {
-+      f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers *
-+                   BytesPerInt;
-+    }
-+#endif
-+    return f0_offset;
++    return 0;
 +  }
 +  int reserved_slot_offset_in_bytes(void) {
 +    return f0_offset_in_bytes() +
@@ -40723,7 +38515,7 @@ index 000000000..9922ff4cf
 +  }
 +
 +  int reg_offset_in_bytes(Register r) {
-+    assert(r->encoding() > 4, "ra, sp, gp and tp not saved");
++    assert (r->encoding() > 4, "ra, sp, gp and tp not saved");
 +    return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize;
 +  }
 +
@@ -40737,22 +38529,10 @@ index 000000000..9922ff4cf
 +           RegisterImpl::max_slots_per_register *
 +           BytesPerInt;
 +  }
-+
-+  // During deoptimization only the result registers need to be restored,
-+  // all the other values have already been extracted.
-+  void restore_result_registers(MacroAssembler* masm);
 +};
 +
 +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
-+  int vector_size_in_bytes = 0;
-+  int vector_size_in_slots = 0;
-+#ifdef COMPILER2
-+  if (_save_vectors) {
-+    vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE);
-+    vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT);
-+  }
-+#endif
-+
++  assert_cond(masm != NULL && total_frame_words != NULL);
 +  int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
 +  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 +  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
@@ -40762,9 +38542,9 @@ index 000000000..9922ff4cf
 +  int frame_size_in_words = frame_size_in_bytes / wordSize;
 +  *total_frame_words = frame_size_in_words;
 +
-+  // Save Integer, Float and Vector registers.
++  // Save Integer and Float registers.
 +  __ enter();
-+  __ push_CPU_state(_save_vectors, vector_size_in_bytes);
++  __ push_CPU_state();
 +
 +  // Set an oopmap for the call site.  This oopmap will map all
 +  // oop-registers and debug-info registers as callee-saved.  This
@@ -40777,13 +38557,6 @@ index 000000000..9922ff4cf
 +
 +  int sp_offset_in_slots = 0;
 +  int step_in_slots = 0;
-+  if (_save_vectors) {
-+    step_in_slots = vector_size_in_slots;
-+    for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
-+      VectorRegister r = as_VectorRegister(i);
-+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
-+    }
-+  }
 +
 +  step_in_slots = FloatRegisterImpl::max_slots_per_register;
 +  for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
@@ -40807,46 +38580,40 @@ index 000000000..9922ff4cf
 +}
 +
 +void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
-+#ifdef COMPILER2
-+  __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE));
-+#else
-+  __ pop_CPU_state(_save_vectors);
-+#endif
++  assert_cond(masm != NULL);
++  __ pop_CPU_state();
 +  __ leave();
 +}
 +
-+void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
-+  // Just restore result register. Only used by deoptimization. By
-+  // now any callee save register that needs to be restored to a c2
-+  // caller of the deoptee has been extracted into the vframeArray
-+  // and will be stuffed into the c2i adapter we create for later
-+  // restoration so only result registers need to be restored here.
-+  // Restore fp result register
-+  __ fld(f10, Address(sp, freg_offset_in_bytes(f10)));
-+  // Restore integer result register
-+  __ ld(x10, Address(sp, reg_offset_in_bytes(x10)));
-+
-+  // Pop all of the register save are off the stack
-+  __ add(sp, sp, align_up(ra_offset_in_bytes(), 16));
-+}
-+
 +// Is vector's size (in bytes) bigger than a size saved by default?
-+// riscv does not ovlerlay the floating-point registers on vector registers like aarch64.
 +bool SharedRuntime::is_wide_vector(int size) {
-+  return UseRVV;
++  return false;
 +}
 +
 +size_t SharedRuntime::trampoline_size() {
-+  // Byte size of function generate_trampoline. movptr_with_offset: 5 instructions, jalr: 1 instrction
-+  return 6 * NativeInstruction::instruction_size; // lui + addi + slli + addi + slli + jalr
++  return 6 * NativeInstruction::instruction_size;
 +}
 +
 +void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
 +  int32_t offset = 0;
-+  __ movptr_with_offset(t0, destination, offset); // lui + addi + slli + addi + slli
++  __ movptr_with_offset(t0, destination, offset);
 +  __ jalr(x0, t0, offset);
 +}
 +
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++static int reg2offset_in(VMReg r) {
++  // Account for saved fp and ra
++  // This should really be in_preserve_stack_slots
++  return r->reg2stack() * VMRegImpl::stack_slot_size;
++}
++
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
++
 +// ---------------------------------------------------------------------------
 +// Read the array of BasicTypes from a signature, and compute where the
 +// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
@@ -40871,7 +38638,6 @@ index 000000000..9922ff4cf
 +                                           VMRegPair *regs,
 +                                           int total_args_passed,
 +                                           int is_outgoing) {
-+  assert_cond(sig_bt != NULL && regs != NULL);
 +  // Create the mapping between argument positions and
 +  // registers.
 +  static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
@@ -40945,6 +38711,7 @@ index 000000000..9922ff4cf
 +
 +// Patch the callers callsite with entry to compiled code if it exists.
 +static void patch_callers_callsite(MacroAssembler *masm) {
++  assert_cond(masm != NULL);
 +  Label L;
 +  __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
 +  __ beqz(t0, L);
@@ -40966,6 +38733,7 @@ index 000000000..9922ff4cf
 +  int32_t offset = 0;
 +  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
 +  __ jalr(x1, t0, offset);
++
 +  __ pop_CPU_state();
 +  // restore sp
 +  __ leave();
@@ -41052,7 +38820,7 @@ index 000000000..9922ff4cf
 +          __ sd(t0, Address(sp, next_off), /*temp register*/esp);
 +#ifdef ASSERT
 +          // Overwrite the unused slot with known junk
-+          __ mv(t0, 0xdeadffffdeadaaaaul);
++          __ li(t0, 0xdeadffffdeadaaaaul);
 +          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
 +#endif /* ASSERT */
 +        } else {
@@ -41068,10 +38836,10 @@ index 000000000..9922ff4cf
 +        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 +        // T_DOUBLE and T_LONG use two slots in the interpreter
 +        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
-+          // jlong/double in gpr
++          // long/double in gpr
 +#ifdef ASSERT
 +          // Overwrite the unused slot with known junk
-+          __ mv(t0, 0xdeadffffdeadaaabul);
++          __ li(t0, 0xdeadffffdeadaaabul);
 +          __ sd(t0, Address(sp, st_off), /*temp register*/esp);
 +#endif /* ASSERT */
 +          __ sd(r, Address(sp, next_off));
@@ -41087,7 +38855,7 @@ index 000000000..9922ff4cf
 +      } else {
 +#ifdef ASSERT
 +        // Overwrite the unused slot with known junk
-+        __ mv(t0, 0xdeadffffdeadaaacul);
++        __ li(t0, 0xdeadffffdeadaaacul);
 +        __ sd(t0, Address(sp, st_off), /*temp register*/esp);
 +#endif /* ASSERT */
 +        __ fsd(r_1->as_FloatRegister(), Address(sp, next_off));
@@ -41265,7 +39033,6 @@ index 000000000..9922ff4cf
 +                                         VMRegPair *regs2,
 +                                         int total_args_passed) {
 +  assert(regs2 == NULL, "not needed on riscv");
-+  assert_cond(sig_bt != NULL && regs != NULL);
 +
 +  // We return the amount of VMRegImpl stack slots we need to reserve for all
 +  // the arguments NOT counting out_preserve_stack_slots.
@@ -41343,7 +39110,190 @@ index 000000000..9922ff4cf
 +  return stk_args;
 +}
 +
++// On 64 bit we will store integer like items to the stack as
++// 64 bits items (riscv64 abi) even though java would only store
++// 32bits for a parameter. On 32bit it will simply be 32 bits
++// So this routine will do 32->32 on 32bit and 32->64 on 64bit
++static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert_cond(masm != NULL);
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ld(t0, Address(fp, reg2offset_in(src.first())));
++      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
++    } else {
++      // stack to reg
++      __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
++  } else {
++    if (dst.first() != src.first()) {
++      // 32bits extend sign
++      __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
++    }
++  }
++}
++
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++  assert_cond(masm != NULL && map != NULL && receiver_offset != NULL);
++  // must pass a handle. First figure out the location we use as a handle
++  Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
++
++  // See if oop is NULL if it is we need no handle
++
++  if (src.first()->is_stack()) {
++
++    // Oop is already on the stack as an argument
++    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
++
++    __ ld(t0, Address(fp, reg2offset_in(src.first())));
++    __ la(rHandle, Address(fp, reg2offset_in(src.first())));
++    // conditionally move a NULL
++    Label notZero1;
++    __ bnez(t0, notZero1);
++    __ mv(rHandle, zr);
++    __ bind(notZero1);
++  } else {
++
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles and pass a handle if oop is non-NULL
++
++    const Register rOop = src.first()->as_Register();
++    int oop_slot = -1;
++    if (rOop == j_rarg0) {
++      oop_slot = 0;
++    } else if (rOop == j_rarg1) {
++      oop_slot = 1;
++    } else if (rOop == j_rarg2) {
++      oop_slot = 2;
++    } else if (rOop == j_rarg3) {
++      oop_slot = 3;
++    } else if (rOop == j_rarg4) {
++      oop_slot = 4;
++    } else if (rOop == j_rarg5) {
++      oop_slot = 5;
++    } else if (rOop == j_rarg6) {
++      oop_slot = 6;
++    } else {
++      assert(rOop == j_rarg7, "wrong register");
++      oop_slot = 7;
++    }
++
++    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot * VMRegImpl::stack_slot_size;
++
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    // Store oop in handle area, may be NULL
++    __ sd(rOop, Address(sp, offset));
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
++
++    //rOop maybe the same as rHandle
++    if (rOop == rHandle) {
++      Label isZero;
++      __ beqz(rOop, isZero);
++      __ la(rHandle, Address(sp, offset));
++      __ bind(isZero);
++    } else {
++      Label notZero2;
++      __ la(rHandle, Address(sp, offset));
++      __ bnez(rOop, notZero2);
++      __ mv(rHandle, zr);
++      __ bind(notZero2);
++    }
++  }
++
++  // If arg is on the stack then place it otherwise it is already in correct reg.
++  if (dst.first()->is_stack()) {
++    __ sd(rHandle, Address(sp, reg2offset_out(dst.first())));
++  }
++}
++
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(src.first()->is_stack() && dst.first()->is_stack() ||
++         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
++  assert_cond(masm != NULL);
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ lwu(t0, Address(fp, reg2offset_in(src.first())));
++      __ sw(t0, Address(sp, reg2offset_out(dst.first())));
++    } else if (dst.first()->is_Register()) {
++      __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (src.first() != dst.first()) {
++    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
++      __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      ShouldNotReachHere();
++    }
++  }
++}
++
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert_cond(masm != NULL);
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ld(t0, Address(fp, reg2offset_in(src.first())));
++      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
++    } else {
++      // stack to reg
++      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
++  } else {
++    if (dst.first() != src.first()) {
++      __ mv(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
++
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(src.first()->is_stack() && dst.first()->is_stack() ||
++         src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
++  assert_cond(masm != NULL);
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ ld(t0, Address(fp, reg2offset_in(src.first())));
++      __ sd(t0, Address(sp, reg2offset_out(dst.first())));
++    } else if (dst.first()-> is_Register()) {
++      __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (src.first() != dst.first()) {
++    if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
++      __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    } else {
++      ShouldNotReachHere();
++    }
++  }
++}
++
 +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  assert_cond(masm != NULL);
 +  // We always ignore the frame_slots arg and just use the space just below frame pointer
 +  // which by this time is free to use
 +  switch (ret_type) {
@@ -41361,6 +39311,7 @@ index 000000000..9922ff4cf
 +}
 +
 +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  assert_cond(masm != NULL);
 +  // We always ignore the frame_slots arg and just use the space just below frame pointer
 +  // which by this time is free to use
 +  switch (ret_type) {
@@ -41378,6 +39329,7 @@ index 000000000..9922ff4cf
 +}
 +
 +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  assert_cond(masm != NULL && args != NULL);
 +  RegSet x;
 +  for ( int i = first_arg ; i < arg_count ; i++ ) {
 +    if (args[i].first()->is_Register()) {
@@ -41391,6 +39343,7 @@ index 000000000..9922ff4cf
 +}
 +
 +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  assert_cond(masm != NULL && args != NULL);
 +  RegSet x;
 +  for ( int i = first_arg ; i < arg_count ; i++ ) {
 +    if (args[i].first()->is_Register()) {
@@ -41410,85 +39363,17 @@ index 000000000..9922ff4cf
 +  }
 +}
 +
-+// Check GCLocker::needs_gc and enter the runtime if it's true.  This
-+// keeps a new JNI critical region from starting until a GC has been
-+// forced.  Save down any oops in registers and describe them in an
-+// OopMap.
-+static void check_needs_gc_for_critical_native(MacroAssembler* masm,
-+                                               int stack_slots,
-+                                               int total_c_args,
-+                                               int total_in_args,
-+                                               int arg_save_area,
-+                                               OopMapSet* oop_maps,
-+                                               VMRegPair* in_regs,
-+                                               BasicType* in_sig_bt) { Unimplemented(); }
-+
-+// Unpack an array argument into a pointer to the body and the length
-+// if the array is non-null, otherwise pass 0 for both.
-+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); }
-+
-+class ComputeMoveOrder: public StackObj {
-+  class MoveOperation: public ResourceObj {
-+    friend class ComputeMoveOrder;
-+   private:
-+    VMRegPair        _src;
-+    VMRegPair        _dst;
-+    int              _src_index;
-+    int              _dst_index;
-+    bool             _processed;
-+    MoveOperation*   _next;
-+    MoveOperation*   _prev;
-+
-+    static int get_id(VMRegPair r) { Unimplemented(); return 0; }
-+
-+   public:
-+    MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst):
-+      _src(src)
-+    , _dst(dst)
-+    , _src_index(src_index)
-+    , _dst_index(dst_index)
-+    , _processed(false)
-+    , _next(NULL)
-+    , _prev(NULL) { Unimplemented(); }
-+
-+    ~MoveOperation() {
-+      _next = NULL;
-+      _prev = NULL;
-+    }
-+
-+    VMRegPair src() const              { Unimplemented(); return _src; }
-+    int src_id() const                 { Unimplemented(); return 0; }
-+    int src_index() const              { Unimplemented(); return 0; }
-+    VMRegPair dst() const              { Unimplemented(); return _src; }
-+    void set_dst(int i, VMRegPair dst) { Unimplemented(); }
-+    int dst_index() const              { Unimplemented(); return 0; }
-+    int dst_id() const                 { Unimplemented(); return 0; }
-+    MoveOperation* next() const        { Unimplemented(); return 0; }
-+    MoveOperation* prev() const        { Unimplemented(); return 0; }
-+    void set_processed()               { Unimplemented(); }
-+    bool is_processed() const          { Unimplemented(); return 0; }
-+
-+    // insert
-+    void break_cycle(VMRegPair temp_register) { Unimplemented(); }
-+
-+    void link(GrowableArray<MoveOperation*>& killer) { Unimplemented(); }
-+  };
-+
-+ private:
-+  GrowableArray<MoveOperation*> edges;
-+
-+ public:
-+  ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs,
-+                   BasicType* in_sig_bt, GrowableArray<int>& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); }
-+
-+  ~ComputeMoveOrder() {}
-+  // Collected all the move operations
-+  void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); }
-+
-+  // Walk the edges breaking cycles between moves.  The result list
-+  // can be walked in order to produce the proper set of loads
-+  GrowableArray<MoveOperation*>* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; }
-+};
++static void rt_call(MacroAssembler* masm, address dest) {
++  assert_cond(masm != NULL);
++  CodeBlob *cb = CodeCache::find_blob(dest);
++  if (cb) {
++    __ far_call(RuntimeAddress(dest));
++  } else {
++    int32_t offset = 0;
++    __ la_patchable(t0, RuntimeAddress(dest), offset);
++    __ jalr(x1, t0, offset);
++  }
++}
 +
 +static void verify_oop_args(MacroAssembler* masm,
 +                            const methodHandle& method,
@@ -41630,12 +39515,7 @@ index 000000000..9922ff4cf
 +                                       in_ByteSize(-1),
 +                                       (OopMapSet*)NULL);
 +  }
-+  bool is_critical_native = true;
-+  address native_func = critical_entry;
-+  if (native_func == NULL) {
-+    native_func = method->native_function();
-+    is_critical_native = false;
-+  }
++  address native_func = method->native_function();
 +  assert(native_func != NULL, "must have function");
 +
 +  // An OopMap for lock (and class if static)
@@ -41650,70 +39530,20 @@ index 000000000..9922ff4cf
 +  // the hidden arguments as arg[0] and possibly arg[1] (static method)
 +
 +  const int total_in_args = method->size_of_parameters();
-+  int total_c_args = total_in_args;
-+  if (!is_critical_native) {
-+    total_c_args += 1;
-+    if (method->is_static()) {
-+      total_c_args++;
-+    }
-+  } else {
-+    for (int i = 0; i < total_in_args; i++) {
-+      if (in_sig_bt[i] == T_ARRAY) {
-+        total_c_args++;
-+      }
-+    }
-+  }
++  int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
 +
 +  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
 +  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
-+  assert_cond(out_sig_bt != NULL && out_regs != NULL);
 +  BasicType* in_elem_bt = NULL;
 +
 +  int argc = 0;
-+  if (!is_critical_native) {
-+    out_sig_bt[argc++] = T_ADDRESS;
-+    if (method->is_static()) {
-+      out_sig_bt[argc++] = T_OBJECT;
-+    }
++  out_sig_bt[argc++] = T_ADDRESS;
++  if (method->is_static()) {
++    out_sig_bt[argc++] = T_OBJECT;
++  }
 +
-+    for (int i = 0; i < total_in_args ; i++) {
-+      out_sig_bt[argc++] = in_sig_bt[i];
-+    }
-+  } else {
-+    Thread* THREAD = Thread::current();
-+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
-+    assert_cond(in_elem_bt != NULL);
-+    SignatureStream ss(method->signature());
-+    for (int i = 0; i < total_in_args ; i++) {
-+      if (in_sig_bt[i] == T_ARRAY) {
-+        // Arrays are passed as int, elem* pair
-+        out_sig_bt[argc++] = T_INT;
-+        out_sig_bt[argc++] = T_ADDRESS;
-+        Symbol* atype = ss.as_symbol(CHECK_NULL);
-+        const char* at = atype->as_C_string();
-+        if (strlen(at) == 2) {
-+          assert(at[0] == '[', "must be");
-+          switch (at[1]) {
-+            case 'B': in_elem_bt[i]  = T_BYTE; break;
-+            case 'C': in_elem_bt[i]  = T_CHAR; break;
-+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
-+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
-+            case 'I': in_elem_bt[i]  = T_INT; break;
-+            case 'J': in_elem_bt[i]  = T_LONG; break;
-+            case 'S': in_elem_bt[i]  = T_SHORT; break;
-+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
-+            default: ShouldNotReachHere();
-+          }
-+        }
-+      } else {
-+        out_sig_bt[argc++] = in_sig_bt[i];
-+        in_elem_bt[i] = T_VOID;
-+      }
-+      if (in_sig_bt[i] != T_VOID) {
-+        assert(in_sig_bt[i] == ss.type(), "must match");
-+        ss.next();
-+      }
-+    }
++  for (int i = 0; i < total_in_args ; i++) {
++    out_sig_bt[argc++] = in_sig_bt[i];
 +  }
 +
 +  // Now figure out where the args must be stored and how much stack space
@@ -41730,34 +39560,6 @@ index 000000000..9922ff4cf
 +
 +  // Now the space for the inbound oop handle area
 +  int total_save_slots = 8 * VMRegImpl::slots_per_word;  // 8 arguments passed in registers
-+  if (is_critical_native) {
-+    // Critical natives may have to call out so they need a save area
-+    // for register arguments.
-+    int double_slots = 0;
-+    int single_slots = 0;
-+    for ( int i = 0; i < total_in_args; i++) {
-+      if (in_regs[i].first()->is_Register()) {
-+        const Register reg = in_regs[i].first()->as_Register();
-+        switch (in_sig_bt[i]) {
-+          case T_BOOLEAN:
-+          case T_BYTE:
-+          case T_SHORT:
-+          case T_CHAR:
-+          case T_INT:  single_slots++; break;
-+          case T_ARRAY:  // specific to LP64 (7145024)
-+          case T_LONG: double_slots++; break;
-+          default:  ShouldNotReachHere();
-+        }
-+      } else if (in_regs[i].first()->is_FloatRegister()) {
-+        ShouldNotReachHere();
-+      }
-+    }
-+    total_save_slots = double_slots * 2 + single_slots;
-+    // align the save area
-+    if (double_slots != 0) {
-+      stack_slots = align_up(stack_slots, 2);
-+    }
-+  }
 +
 +  int oop_handle_offset = stack_slots;
 +  stack_slots += total_save_slots;
@@ -41849,11 +39651,7 @@ index 000000000..9922ff4cf
 +  __ nop();
 +
 +  // Generate stack overflow check
-+  if (UseStackBanging) {
-+    __ bang_stack_with_offset(checked_cast<int>(JavaThread::stack_shadow_zone_size()));
-+  } else {
-+    Unimplemented();
-+  }
++  __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
 +
 +  // Generate a new frame for the wrapper.
 +  __ enter();
@@ -41868,11 +39666,6 @@ index 000000000..9922ff4cf
 +
 +  const Register oop_handle_reg = x18;
 +
-+  if (is_critical_native) {
-+    check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
-+                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
-+  }
-+
 +  //
 +  // We immediately shuffle the arguments so that any vm call we have to
 +  // make from here on out (sync slow path, jvmti, etc.) we will have
@@ -41917,22 +39710,14 @@ index 000000000..9922ff4cf
 +
 +#endif /* ASSERT */
 +
-+  // This may iterate in two different directions depending on the
-+  // kind of native it is.  The reason is that for regular JNI natives
-+  // the incoming and outgoing registers are offset upwards and for
-+  // critical natives they are offset down.
++  // For JNI natives the incoming and outgoing registers are offset upwards.
 +  GrowableArray<int> arg_order(2 * total_in_args);
 +  VMRegPair tmp_vmreg;
 +  tmp_vmreg.set2(x9->as_VMReg());
 +
-+  if (!is_critical_native) {
-+    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
-+      arg_order.push(i);
-+      arg_order.push(c_arg);
-+    }
-+  } else {
-+    // Compute a valid move order, using tmp_vmreg to break any cycles
-+    ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
++  for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++    arg_order.push(i);
++    arg_order.push(c_arg);
 +  }
 +
 +  int temploc = -1;
@@ -41940,20 +39725,7 @@ index 000000000..9922ff4cf
 +    int i = arg_order.at(ai);
 +    int c_arg = arg_order.at(ai + 1);
 +    __ block_comment(err_msg("mv %d -> %d", i, c_arg));
-+    if (c_arg == -1) {
-+      assert(is_critical_native, "should only be required for critical natives");
-+      // This arg needs to be moved to a temporary
-+      __ mv(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
-+      in_regs[i] = tmp_vmreg;
-+      temploc = i;
-+      continue;
-+    } else if (i == -1) {
-+      assert(is_critical_native, "should only be required for critical natives");
-+      // Read from the temporary location
-+      assert(temploc != -1, "must be valid");
-+      i = temploc;
-+      temploc = -1;
-+    }
++    assert(c_arg != -1 && i != -1, "wrong order");
 +#ifdef ASSERT
 +    if (in_regs[i].first()->is_Register()) {
 +      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
@@ -41968,32 +39740,17 @@ index 000000000..9922ff4cf
 +#endif /* ASSERT */
 +    switch (in_sig_bt[i]) {
 +      case T_ARRAY:
-+        if (is_critical_native) {
-+          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
-+          c_arg++;
-+#ifdef ASSERT
-+          if (out_regs[c_arg].first()->is_Register()) {
-+            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
-+          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
-+            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
-+          }
-+#endif
-+          int_args++;
-+          break;
-+        }
-+      // no break
 +      case T_OBJECT:
-+        assert(!is_critical_native, "no oop arguments");
-+        __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
-+                      ((i == 0) && (!is_static)),
-+                      &receiver_offset);
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
 +        int_args++;
 +        break;
 +      case T_VOID:
 +        break;
 +
 +      case T_FLOAT:
-+        __ float_move(in_regs[i], out_regs[c_arg]);
++        float_move(masm, in_regs[i], out_regs[c_arg]);
 +        float_args++;
 +        break;
 +
@@ -42001,12 +39758,12 @@ index 000000000..9922ff4cf
 +        assert( i + 1 < total_in_args &&
 +                in_sig_bt[i + 1] == T_VOID &&
 +                out_sig_bt[c_arg + 1] == T_VOID, "bad arg list");
-+        __ double_move(in_regs[i], out_regs[c_arg]);
++        double_move(masm, in_regs[i], out_regs[c_arg]);
 +        float_args++;
 +        break;
 +
 +      case T_LONG :
-+        __ long_move(in_regs[i], out_regs[c_arg]);
++        long_move(masm, in_regs[i], out_regs[c_arg]);
 +        int_args++;
 +        break;
 +
@@ -42015,7 +39772,7 @@ index 000000000..9922ff4cf
 +        break;
 +
 +      default:
-+        __ move32_64(in_regs[i], out_regs[c_arg]);
++        move32_64(masm, in_regs[i], out_regs[c_arg]);
 +        int_args++;
 +    }
 +  }
@@ -42025,7 +39782,7 @@ index 000000000..9922ff4cf
 +  int c_arg = total_c_args - total_in_args;
 +
 +  // Pre-load a static method's oop into c_rarg1.
-+  if (method->is_static() && !is_critical_native) {
++  if (method->is_static()) {
 +
 +    //  load oop into a register
 +    __ movoop(c_rarg1,
@@ -42084,7 +39841,6 @@ index 000000000..9922ff4cf
 +  Label lock_done;
 +
 +  if (method->is_synchronized()) {
-+    assert(!is_critical_native, "unhandled");
 +
 +    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
 +
@@ -42132,7 +39888,6 @@ index 000000000..9922ff4cf
 +    __ bnez(swap_reg, slow_path_lock);
 +
 +    // Slow path will re-enter here
-+
 +    __ bind(lock_done);
 +  }
 +
@@ -42140,9 +39895,7 @@ index 000000000..9922ff4cf
 +  // Finally just about ready to make the JNI call
 +
 +  // get JNIEnv* which is first argument to native
-+  if (!is_critical_native) {
-+    __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
-+  }
++  __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
 +
 +  // Now set thread in native
 +  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
@@ -42150,7 +39903,7 @@ index 000000000..9922ff4cf
 +  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
 +  __ sw(t0, Address(t1));
 +
-+  __ rt_call(native_func);
++  rt_call(masm, native_func);
 +
 +  __ bind(native_return);
 +
@@ -42158,10 +39911,13 @@ index 000000000..9922ff4cf
 +  oop_maps->add_gc_map(return_pc - start, map);
 +
 +  // Unpack native results.
-+  if(ret_type != T_OBJECT && ret_type != T_ARRAY) {
++  if (ret_type != T_OBJECT && ret_type != T_ARRAY) {
 +    __ cast_primitive_type(ret_type, x10);
 +  }
 +
++  Label safepoint_in_progress, safepoint_in_progress_done;
++  Label after_transition;
++
 +  // Switch thread to "native transition" state before reading the synchronization state.
 +  // This additional state is necessary because reading and testing the synchronization
 +  // state is not atomic w.r.t. GC, as this scenario demonstrates:
@@ -42171,29 +39927,12 @@ index 000000000..9922ff4cf
 +  //     didn't see any synchronization is progress, and escapes.
 +  __ mv(t0, _thread_in_native_trans);
 +
-+  if(os::is_MP()) {
-+    if (UseMembar) {
-+      __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
-+
-+      // Force this write out before the read below
-+      __ membar(MacroAssembler::AnyAny);
-+    } else {
-+      __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
-+      __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-+      __ sw(t0, Address(t1));
++  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
 +
-+      // Write serialization page so VM thread can do a pseudo remote membar.
-+      // We use the current thread pointer to calculate a thread specific
-+      // offset to write to within the page. This minimizes bus traffic
-+      // due to cache line collision.
-+      __ serialize_memory(xthread, x12, t0);
-+    }
-+  } else {
-+    __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
-+  }
++  // Force this write out before the read below
++  __ membar(MacroAssembler::AnyAny);
 +
 +  // check for safepoint operation in progress and/or pending suspend requests
-+  Label safepoint_in_progress, safepoint_in_progress_done;
 +  {
 +    __ safepoint_poll_acquire(safepoint_in_progress);
 +    __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
@@ -42202,7 +39941,6 @@ index 000000000..9922ff4cf
 +  }
 +
 +  // change thread state
-+  Label after_transition;
 +  __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
 +  __ mv(t0, _thread_in_Java);
 +  __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
@@ -42233,7 +39971,6 @@ index 000000000..9922ff4cf
 +    }
 +
 +    // Simple recursive lock?
-+
 +    __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
 +    __ beqz(t0, done);
 +
@@ -42242,7 +39979,6 @@ index 000000000..9922ff4cf
 +      save_native_result(masm, ret_type, stack_slots);
 +    }
 +
-+
 +    // get address of the stack lock
 +    __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
 +    //  get old displaced header
@@ -42274,7 +40010,7 @@ index 000000000..9922ff4cf
 +  __ reset_last_Java_frame(false);
 +
 +  // Unbox oop result, e.g. JNIHandles::resolve result.
-+  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
++  if (is_reference_type(ret_type)) {
 +    __ resolve_jobject(x10, xthread, t1);
 +  }
 +
@@ -42283,32 +40019,26 @@ index 000000000..9922ff4cf
 +    __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
 +  }
 +
-+  if (!is_critical_native) {
-+    // reset handle block
-+    __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
-+    __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
-+  }
++  // reset handle block
++  __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
++  __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
 +
 +  __ leave();
 +
-+  if (!is_critical_native) {
-+    // Any exception pending?
-+    __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
-+    __ bnez(t0, exception_pending);
-+  }
++  // Any exception pending?
++  __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
++  __ bnez(t0, exception_pending);
 +
 +  // We're done
 +  __ ret();
 +
 +  // Unexpected paths are out of line and go here
 +
-+  if (!is_critical_native) {
-+    // forward the exception
-+    __ bind(exception_pending);
++  // forward the exception
++  __ bind(exception_pending);
 +
-+    // and forward the exception
-+    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-+  }
++  // and forward the exception
++  __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 +
 +  // Slow path locking & unlocking
 +  if (method->is_synchronized()) {
@@ -42345,7 +40075,7 @@ index 000000000..9922ff4cf
 +    __ block_comment("Slow path unlock {");
 +    __ bind(slow_path_unlock);
 +
-+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
 +      save_native_result(masm, ret_type, stack_slots);
 +    }
 +
@@ -42358,7 +40088,7 @@ index 000000000..9922ff4cf
 +    __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
 +    __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset())));
 +
-+    __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
++    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
 +
 +#ifdef ASSERT
 +    {
@@ -42372,7 +40102,7 @@ index 000000000..9922ff4cf
 +
 +    __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
 +
-+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
 +      restore_native_result(masm, ret_type, stack_slots);
 +    }
 +    __ j(unlock_done);
@@ -42385,7 +40115,7 @@ index 000000000..9922ff4cf
 +
 +  __ bind(reguard);
 +  save_native_result(masm, ret_type, stack_slots);
-+  __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
++  rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
 +  restore_native_result(masm, ret_type, stack_slots);
 +  // and continue
 +  __ j(reguard_done);
@@ -42404,21 +40134,12 @@ index 000000000..9922ff4cf
 +    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
 +#endif
 +    int32_t offset = 0;
-+    if (!is_critical_native) {
-+      __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
-+    } else {
-+      __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)), offset);
-+    }
++    __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
 +    __ jalr(x1, t0, offset);
++
 +    // Restore any method result value
 +    restore_native_result(masm, ret_type, stack_slots);
 +
-+    if (is_critical_native) {
-+      // The call above performed the transition to thread_in_Java so
-+      // skip the transition logic above.
-+      __ j(after_transition);
-+    }
-+
 +    __ j(safepoint_in_progress_done);
 +    __ block_comment("} safepoint");
 +  }
@@ -42466,10 +40187,6 @@ index 000000000..9922ff4cf
 +                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
 +                                            oop_maps);
 +  assert(nm != NULL, "create native nmethod fail!");
-+  if (is_critical_native) {
-+    nm->set_lazy_critical_native(true);
-+  }
-+
 +  return nm;
 +}
 +
@@ -42498,7 +40215,7 @@ index 000000000..9922ff4cf
 +  OopMap* map = NULL;
 +  OopMapSet *oop_maps = new OopMapSet();
 +  assert_cond(masm != NULL && oop_maps != NULL);
-+  RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0);
++  RegisterSaver reg_saver;
 +
 +  // -------------
 +  // This code enters when returning to a de-optimized nmethod.  A return
@@ -42590,7 +40307,7 @@ index 000000000..9922ff4cf
 +  // Now it is safe to overwrite any register
 +
 +  // Deopt during an exception.  Save exec mode for unpack_frames.
-+  __ mv(xcpool, Deoptimization::Unpack_exception); // callee-saved
++  __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved
 +
 +  // load throwing pc from JavaThread and patch it as the return address
 +  // of the current frame. Then clear the field in JavaThread
@@ -42651,7 +40368,7 @@ index 000000000..9922ff4cf
 +
 +  __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
 +  Label noException;
-+  __ mv(t0, Deoptimization::Unpack_exception);
++  __ li(t0, Deoptimization::Unpack_exception);
 +  __ bne(xcpool, t0, noException); // Was exception pending?
 +  __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
 +  __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
@@ -42668,7 +40385,14 @@ index 000000000..9922ff4cf
 +  // Only register save data is on the stack.
 +  // Now restore the result registers.  Everything else is either dead
 +  // or captured in the vframeArray.
-+  reg_saver.restore_result_registers(masm);
++
++  // Restore fp result register
++  __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
++  // Restore integer result register
++  __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
++
++  // Pop all of the register save area off the stack
++  __ add(sp, sp, frame_size_in_words * wordSize);
 +
 +  // All of the register save area has been popped of the stack. Only the
 +  // return address remains.
@@ -42697,10 +40421,8 @@ index 000000000..9922ff4cf
 +  // Compilers generate code that bang the stack by as much as the
 +  // interpreter would need. So this stack banging should never
 +  // trigger a fault. Verify that it does not on non product builds.
-+  if (UseStackBanging) {
-+    __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
-+    __ bang_stack_size(x9, x12);
-+  }
++  __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
++  __ bang_stack_size(x9, x12);
 +#endif
 +  // Load address of array of frame pcs into x12
 +  __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
@@ -42725,7 +40447,7 @@ index 000000000..9922ff4cf
 +  __ sub(sp, sp, x9);
 +
 +  // Push interpreter frames in a loop
-+  __ mv(t0, (uint64_t)0xDEADDEAD);     // Make a recognizable pattern
++  __ li(t0, 0xDEADDEAD);               // Make a recognizable pattern
 +  __ mv(t1, t0);
 +  Label loop;
 +  __ bind(loop);
@@ -42775,7 +40497,7 @@ index 000000000..9922ff4cf
 +  // Set an oopmap for the call site
 +  // Use the same PC we used for the last java frame
 +  oop_maps->add_gc_map(the_pc - start,
-+                       new OopMap( frame_size_in_words, 0 ));
++                       new OopMap(frame_size_in_words, 0));
 +
 +  // Clear fp AND pc
 +  __ reset_last_Java_frame(true);
@@ -42901,12 +40623,10 @@ index 000000000..9922ff4cf
 +  // Compilers generate code that bang the stack by as much as the
 +  // interpreter would need. So this stack banging should never
 +  // trigger a fault. Verify that it does not on non product builds.
-+  if (UseStackBanging) {
-+    __ lwu(x11, Address(x14,
-+                        Deoptimization::UnrollBlock::
-+                        total_frame_sizes_offset_in_bytes()));
-+    __ bang_stack_size(x11, x12);
-+  }
++  __ lwu(x11, Address(x14,
++                      Deoptimization::UnrollBlock::
++                      total_frame_sizes_offset_in_bytes()));
++  __ bang_stack_size(x11, x12);
 +#endif
 +
 +  // Load address of array of frame pcs into x12 (address*)
@@ -43019,7 +40739,7 @@ index 000000000..9922ff4cf
 +  address call_pc = NULL;
 +  int frame_size_in_words = -1;
 +  bool cause_return = (poll_type == POLL_AT_RETURN);
-+  RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
++  RegisterSaver reg_saver;
 +
 +  // Save Integer and Float registers.
 +  map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
@@ -43128,7 +40848,7 @@ index 000000000..9922ff4cf
 +// must do any gc of the args.
 +//
 +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
-+  assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
 +
 +  // allocate space for the code
 +  ResourceMark rm;
@@ -43138,7 +40858,7 @@ index 000000000..9922ff4cf
 +  assert_cond(masm != NULL);
 +
 +  int frame_size_in_words = -1;
-+  RegisterSaver reg_saver(false /* save_vectors */);
++  RegisterSaver reg_saver;
 +
 +  OopMapSet *oop_maps = new OopMapSet();
 +  assert_cond(oop_maps != NULL);
@@ -43290,6 +41010,10 @@ index 000000000..9922ff4cf
 +  __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
 +  __ jalr(x1, t0, offset);
 +
++
++  // handle_exception_C is a special VM call which does not require an explicit
++  // instruction sync afterwards.
++
 +  // Set an oopmap for the call site.  This oopmap will only be used if we
 +  // are unwinding the stack.  Hence, all locations will be dead.
 +  // Callee-saved registers will be the same as the frame above (i.e.,
@@ -43345,14 +41069,14 @@ index 000000000..9922ff4cf
 +#endif // COMPILER2
 diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
 new file mode 100644
-index 000000000..c5b3b094c
+index 0000000000..9970229c5c
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
 @@ -0,0 +1,3743 @@
 +/*
 + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -43378,9 +41102,11 @@ index 000000000..c5b3b094c
 +#include "precompiled.hpp"
 +#include "asm/macroAssembler.hpp"
 +#include "asm/macroAssembler.inline.hpp"
++#include "compiler/oopMap.hpp"
 +#include "gc/shared/barrierSet.hpp"
 +#include "gc/shared/barrierSetAssembler.hpp"
 +#include "interpreter/interpreter.hpp"
++#include "memory/universe.hpp"
 +#include "nativeInst_riscv.hpp"
 +#include "oops/instanceOop.hpp"
 +#include "oops/method.hpp"
@@ -43397,7 +41123,9 @@ index 000000000..c5b3b094c
 +#ifdef COMPILER2
 +#include "opto/runtime.hpp"
 +#endif
-+
++#if INCLUDE_ZGC
++#include "gc/z/zThreadLocalData.hpp"
++#endif
 +
 +// Declaration and definition of StubGenerator (no .hpp file).
 +// For a more detailed description of the stub routine structure
@@ -43505,8 +41233,8 @@ index 000000000..c5b3b094c
 +  //  -5 [ parameters     (x15) ]
 +  //  -4 [ parameter size (x16) ]
 +  //  -3 [ thread         (x17) ]
-+  //  -2 [ saved fp       (x8)  ] 
-+  //  -1 [ saved ra       (x1)  ] 
++  //  -2 [ saved fp       (x8)  ]
++  //  -1 [ saved ra       (x1)  ]
 +  //   0 [                      ] <--- fp == saved sp (x2)
 +
 +  // Call stub stack layout word offsets from fp
@@ -43539,15 +41267,15 @@ index 000000000..c5b3b094c
 +    x9_off             = -11,
 +
 +    call_wrapper_off   = -10,
-+    result_off         =  -9,
-+    result_type_off    =  -8,
-+    method_off         =  -7,
-+    entry_point_off    =  -6,
-+    parameters_off     =  -5,
-+    parameter_size_off =  -4,
-+    thread_off         =  -3,
-+    fp_f               =  -2,
-+    retaddr_off        =  -1,
++    result_off         = -9,
++    result_type_off    = -8,
++    method_off         = -7,
++    entry_point_off    = -6,
++    parameters_off     = -5,
++    parameter_size_off = -4,
++    thread_off         = -3,
++    fp_f               = -2,
++    retaddr_off        = -1,
 +  };
 +
 +  address generate_call_stub(address& return_address) {
@@ -43701,13 +41429,13 @@ index 000000000..c5b3b094c
 +    __ ld(j_rarg2, result);
 +    Label is_long, is_float, is_double, exit;
 +    __ ld(j_rarg1, result_type);
-+    __ mv(t0, (u1)T_OBJECT);
++    __ li(t0, (u1)T_OBJECT);
 +    __ beq(j_rarg1, t0, is_long);
-+    __ mv(t0, (u1)T_LONG);
++    __ li(t0, (u1)T_LONG);
 +    __ beq(j_rarg1, t0, is_long);
-+    __ mv(t0, (u1)T_FLOAT);
++    __ li(t0, (u1)T_FLOAT);
 +    __ beq(j_rarg1, t0, is_float);
-+    __ mv(t0, (u1)T_DOUBLE);
++    __ li(t0, (u1)T_DOUBLE);
 +    __ beq(j_rarg1, t0, is_double);
 +
 +    // handle T_INT case
@@ -43945,7 +41673,7 @@ index 000000000..c5b3b094c
 +
 +    Label exit, error;
 +
-+    __ push_reg(RegSet::of(c_rarg2, c_rarg3), sp);   // save c_rarg2 and c_rarg3
++    __ push_reg(0x3000, sp);   // save c_rarg2 and c_rarg3
 +
 +    __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
 +    __ ld(c_rarg3, Address(c_rarg2));
@@ -43961,7 +41689,7 @@ index 000000000..c5b3b094c
 +    __ andr(c_rarg2, x10, c_rarg3);
 +    __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits());
 +
-+    // Compare c_rarg2 and c_rarg3
++    // Compare c_rarg2 and c_rarg3.
 +    __ bne(c_rarg2, c_rarg3, error);
 +
 +    // make sure klass is 'reasonable', which is not zero.
@@ -43971,16 +41699,15 @@ index 000000000..c5b3b094c
 +    // return if everything seems ok
 +    __ bind(exit);
 +
-+    __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp);   // pop c_rarg2 and c_rarg3
++    __ pop_reg(0x3000, sp);   // pop c_rarg2 and c_rarg3
 +    __ ret();
 +
 +    // handle errors
 +    __ bind(error);
-+    __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp);   // pop c_rarg2 and c_rarg3
++    __ pop_reg(0x3000, sp);   // pop c_rarg2 and c_rarg3
 +
-+    __ push_reg(RegSet::range(x0, x31), sp);
-+    // prepare parameters for debug64, c_rarg0: address of error message,
-+    // c_rarg1: return address, c_rarg2: address of regs on stack
++    __ pusha();
++    // debug(char* msg, int64_t pc, int64_t regs[])
 +    __ mv(c_rarg0, t0);             // pass address of error message
 +    __ mv(c_rarg1, ra);             // pass return address
 +    __ mv(c_rarg2, sp);             // pass address of regs on stack
@@ -43991,6 +41718,7 @@ index 000000000..c5b3b094c
 +    int32_t offset = 0;
 +    __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset);
 +    __ jalr(x1, t0, offset);
++    __ ebreak();
 +
 +    return start;
 +  }
@@ -44036,42 +41764,276 @@ index 000000000..c5b3b094c
 +    return start;
 +  }
 +
-+  typedef void (MacroAssembler::*copy_insn)(Register R1, Register R2, const int32_t offset);
++  typedef enum {
++    copy_forwards = 1,
++    copy_backwards = -1
++  } copy_direction;
++
++  // Bulk copy of blocks of 8 words.
++  //
++  // count is a count of words.
++  //
++  // Precondition: count >= 8
++  //
++  // Postconditions:
++  //
++  // The least significant bit of count contains the remaining count
++  // of words to copy.  The rest of count is trash.
++  //
++  // s and d are adjusted to point to the remaining words to copy
++  //
++  void generate_copy_longs(Label &start, Register s, Register d, Register count,
++                           copy_direction direction) {
++    int unit = wordSize * direction;
++    int bias = wordSize;
++
++    const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16,
++      tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29;
++
++    const Register stride = x30;
++
++    assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3,
++      tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7);
++    assert_different_registers(s, d, count, t0);
++
++    Label again, drain;
++    const char* stub_name = NULL;
++    if (direction == copy_forwards) {
++      stub_name = "forward_copy_longs";
++    } else {
++      stub_name = "backward_copy_longs";
++    }
++    StubCodeMark mark(this, "StubRoutines", stub_name);
++    __ align(CodeEntryAlignment);
++    __ bind(start);
++
++    if (direction == copy_forwards) {
++      __ sub(s, s, bias);
++      __ sub(d, d, bias);
++    }
++
++#ifdef ASSERT
++    // Make sure we are never given < 8 words
++    {
++      Label L;
 +
-+  void copy_by_step(RegSet tmp_regs, Register src, Register dst,
-+                    unsigned unroll_factor, int unit) {
-+    unsigned char regs[32];
-+    int offset = unit < 0 ? unit : 0;
++      __ li(t0, 8);
++      __ bge(count, t0, L);
++      __ stop("genrate_copy_longs called with < 8 words");
++      __ bind(L);
++    }
++#endif
 +
-+    // Scan bitset to get tmp regs
-+    unsigned int regsSize = 0;
-+    unsigned bitset = tmp_regs.bits();
++    __ ld(tmp_reg0, Address(s, 1 * unit));
++    __ ld(tmp_reg1, Address(s, 2 * unit));
++    __ ld(tmp_reg2, Address(s, 3 * unit));
++    __ ld(tmp_reg3, Address(s, 4 * unit));
++    __ ld(tmp_reg4, Address(s, 5 * unit));
++    __ ld(tmp_reg5, Address(s, 6 * unit));
++    __ ld(tmp_reg6, Address(s, 7 * unit));
++    __ ld(tmp_reg7, Address(s, 8 * unit));
++    __ addi(s, s, 8 * unit);
++
++    __ sub(count, count, 16);
++    __ bltz(count, drain);
++
++    __ bind(again);
++
++    __ sd(tmp_reg0, Address(d, 1 * unit));
++    __ sd(tmp_reg1, Address(d, 2 * unit));
++    __ sd(tmp_reg2, Address(d, 3 * unit));
++    __ sd(tmp_reg3, Address(d, 4 * unit));
++    __ sd(tmp_reg4, Address(d, 5 * unit));
++    __ sd(tmp_reg5, Address(d, 6 * unit));
++    __ sd(tmp_reg6, Address(d, 7 * unit));
++    __ sd(tmp_reg7, Address(d, 8 * unit));
++
++    __ ld(tmp_reg0, Address(s, 1 * unit));
++    __ ld(tmp_reg1, Address(s, 2 * unit));
++    __ ld(tmp_reg2, Address(s, 3 * unit));
++    __ ld(tmp_reg3, Address(s, 4 * unit));
++    __ ld(tmp_reg4, Address(s, 5 * unit));
++    __ ld(tmp_reg5, Address(s, 6 * unit));
++    __ ld(tmp_reg6, Address(s, 7 * unit));
++    __ ld(tmp_reg7, Address(s, 8 * unit));
++
++    __ addi(s, s, 8 * unit);
++    __ addi(d, d, 8 * unit);
++
++    __ sub(count, count, 8);
++    __ bgez(count, again);
++
++    // Drain
++    __ bind(drain);
++
++    __ sd(tmp_reg0, Address(d, 1 * unit));
++    __ sd(tmp_reg1, Address(d, 2 * unit));
++    __ sd(tmp_reg2, Address(d, 3 * unit));
++    __ sd(tmp_reg3, Address(d, 4 * unit));
++    __ sd(tmp_reg4, Address(d, 5 * unit));
++    __ sd(tmp_reg5, Address(d, 6 * unit));
++    __ sd(tmp_reg6, Address(d, 7 * unit));
++    __ sd(tmp_reg7, Address(d, 8 * unit));
++    __ addi(d, d, 8 * unit);
 +
-+    assert(((bitset & (1 << (src->encoding()))) == 0), "src should not in tmp regs");
-+    assert(((bitset & (1 << (dst->encoding()))) == 0), "dst should not in tmp regs");
++    {
++      Label L1, L2;
++      __ andi(t0, count, 4);
++      __ beqz(t0, L1);
 +
-+    for (int reg = 31; reg >= 0; reg--) {
-+      if ((1U << 31) & bitset) {
-+        regs[regsSize++] = reg;
++      __ ld(tmp_reg0, Address(s, 1 * unit));
++      __ ld(tmp_reg1, Address(s, 2 * unit));
++      __ ld(tmp_reg2, Address(s, 3 * unit));
++      __ ld(tmp_reg3, Address(s, 4 * unit));
++      __ addi(s, s, 4 * unit);
++
++      __ sd(tmp_reg0, Address(d, 1 * unit));
++      __ sd(tmp_reg1, Address(d, 2 * unit));
++      __ sd(tmp_reg2, Address(d, 3 * unit));
++      __ sd(tmp_reg3, Address(d, 4 * unit));
++      __ addi(d, d, 4 * unit);
++
++      __ bind(L1);
++
++      if (direction == copy_forwards) {
++        __ addi(s, s, bias);
++        __ addi(d, d, bias);
 +      }
-+      bitset <<= 1;
++
++      __ andi(t0, count, 2);
++      __ beqz(t0, L2);
++      if (direction == copy_backwards) {
++        __ addi(s, s, 2 * unit);
++        __ ld(tmp_reg0, Address(s));
++        __ ld(tmp_reg1, Address(s, wordSize));
++        __ addi(d, d, 2 * unit);
++        __ sd(tmp_reg0, Address(d));
++        __ sd(tmp_reg1, Address(d, wordSize));
++      } else {
++        __ ld(tmp_reg0, Address(s));
++        __ ld(tmp_reg1, Address(s, wordSize));
++        __ addi(s, s, 2 * unit);
++        __ sd(tmp_reg0, Address(d));
++        __ sd(tmp_reg1, Address(d, wordSize));
++        __ addi(d, d, 2 * unit);
++      }
++      __ bind(L2);
++    }
++
++    __ ret();
++  }
++
++  Label copy_f, copy_b;
++
++  // All-singing all-dancing memory copy.
++  //
++  // Copy count units of memory from s to d.  The size of a unit is
++  // step, which can be positive or negative depending on the direction
++  // of copy.  If is_aligned is false, we align the source address.
++  //
++  /*
++   * if (is_aligned) {
++   *   goto copy_8_bytes;
++   * }
++   * bool is_backwards = step < 0;
++   * int granularity = uabs(step);
++   * count = count  *  granularity;   * count bytes
++   *
++   * if (is_backwards) {
++   *   s += count;
++   *   d += count;
++   * }
++   *
++   * count limit maybe greater than 16, for better performance
++   * if (count < 16) {
++   *   goto copy_small;
++   * }
++   *
++   * if ((dst % 8) == (src % 8)) {
++   *   aligned;
++   *   goto copy8;
++   * }
++   *
++   * copy_small:
++   *   load element one by one;
++   * done;
++   */
++
++  typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp);
++
++  void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) {
++    bool is_backward = step < 0;
++    int granularity = uabs(step);
++
++    const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17;
++    assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2);
++    Assembler::SEW sew = Assembler::elembytes_to_sew(granularity);
++    Label loop_forward, loop_backward, done;
++
++    __ mv(dst, d);
++    __ mv(src, s);
++    __ mv(cnt, count);
++
++    __ bind(loop_forward);
++    __ vsetvli(vl, cnt, sew, Assembler::m8);
++    if (is_backward) {
++      __ bne(vl, cnt, loop_backward);
++    }
++
++    __ vlex_v(v0, src, sew);
++    __ sub(cnt, cnt, vl);
++    __ slli(vl, vl, (int)sew);
++    __ add(src, src, vl);
++
++    __ vsex_v(v0, dst, sew);
++    __ add(dst, dst, vl);
++    __ bnez(cnt, loop_forward);
++
++    if (is_backward) {
++      __ j(done);
++
++      __ bind(loop_backward);
++      __ sub(tmp, cnt, vl);
++      __ slli(tmp, tmp, sew);
++      __ add(tmp1, s, tmp);
++      __ vlex_v(v0, tmp1, sew);
++      __ add(tmp2, d, tmp);
++      __ vsex_v(v0, tmp2, sew);
++      __ sub(cnt, cnt, vl);
++      __ bnez(cnt, loop_forward);
++      __ bind(done);
++    }
++  }
++
++  void copy_memory(bool is_aligned, Register s, Register d,
++                   Register count, Register tmp, int step) {
++    if (UseRVV) {
++      return copy_memory_v(s, d, count, tmp, step);
 +    }
 +
++    bool is_backwards = step < 0;
++    int granularity = uabs(step);
++
++    const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17;
++
++    Label same_aligned;
++    Label copy8, copy_small, done;
++
 +    copy_insn ld_arr = NULL, st_arr = NULL;
-+    switch (abs(unit)) {
++    switch (granularity) {
 +      case 1 :
 +        ld_arr = (copy_insn)&MacroAssembler::lbu;
 +        st_arr = (copy_insn)&MacroAssembler::sb;
 +        break;
-+      case BytesPerShort :
++      case 2 :
 +        ld_arr = (copy_insn)&MacroAssembler::lhu;
 +        st_arr = (copy_insn)&MacroAssembler::sh;
 +        break;
-+      case BytesPerInt :
++      case 4 :
 +        ld_arr = (copy_insn)&MacroAssembler::lwu;
 +        st_arr = (copy_insn)&MacroAssembler::sw;
 +        break;
-+      case BytesPerLong :
++      case 8 :
 +        ld_arr = (copy_insn)&MacroAssembler::ld;
 +        st_arr = (copy_insn)&MacroAssembler::sd;
 +        break;
@@ -44079,294 +42041,83 @@ index 000000000..c5b3b094c
 +        ShouldNotReachHere();
 +    }
 +
-+    for (unsigned i = 0; i < unroll_factor; i++) {
-+      (_masm->*ld_arr)(as_Register(regs[0]), src, i * unit + offset);
-+      (_masm->*st_arr)(as_Register(regs[0]), dst, i * unit + offset);
++    __ beqz(count, done);
++    __ slli(cnt, count, exact_log2(granularity));
++    if (is_backwards) {
++      __ add(src, s, cnt);
++      __ add(dst, d, cnt);
++    } else {
++      __ mv(src, s);
++      __ mv(dst, d);
 +    }
 +
-+    __ addi(src, src, unroll_factor * unit);
-+    __ addi(dst, dst, unroll_factor * unit);
-+  }
-+
-+  void copy_tail(Register src, Register dst, Register count_in_bytes, Register tmp,
-+                 int ele_size, unsigned align_unit) {
-+    bool is_backwards = ele_size < 0;
-+    size_t granularity = uabs(ele_size);
-+    for (unsigned unit = (align_unit >> 1); unit >= granularity; unit >>= 1) {
-+      int offset = is_backwards ? (int)(-unit) : unit;
-+      Label exit;
-+      __ andi(tmp, count_in_bytes, unit);
-+      __ beqz(tmp, exit);
-+      copy_by_step(RegSet::of(tmp), src, dst, /* unroll_factor */ 1, offset);
-+      __ bind(exit);
++    if (is_aligned) {
++      __ addi(tmp, cnt, -8);
++      __ bgez(tmp, copy8);
++      __ j(copy_small);
 +    }
-+  }
 +
-+  void copy_loop8(Register src, Register dst, Register count_in_bytes, Register tmp,
-+                  int step, Label *Lcopy_small, Register loopsize = noreg) {
-+    size_t granularity = uabs(step);
-+    RegSet tmp_regs = RegSet::range(x13, x16);
-+    assert_different_registers(src, dst, count_in_bytes, tmp);
++    __ mv(tmp, 16);
++    __ blt(cnt, tmp, copy_small);
 +
-+    Label loop, copy2, copy1, finish;
-+    if (loopsize == noreg) {
-+      loopsize = t1;
-+      __ mv(loopsize, 8 * granularity);
-+    }
++    __ xorr(tmp, src, dst);
++    __ andi(tmp, tmp, 0b111);
++    __ bnez(tmp, copy_small);
 +
-+    // Cyclic copy with 8*step.
-+    __ bind(loop);
-+    {
-+      copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 8, step);
-+      __ sub(count_in_bytes, count_in_bytes, 8 * granularity);
-+      __ bge(count_in_bytes, loopsize, loop);
++    __ bind(same_aligned);
++    __ andi(tmp, src, 0b111);
++    __ beqz(tmp, copy8);
++    if (is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
 +    }
-+
-+    if (Lcopy_small != NULL) {
-+      __ bind(*Lcopy_small);
++    (_masm->*ld_arr)(tmp3, Address(src), t0);
++    (_masm->*st_arr)(tmp3, Address(dst), t0);
++    if (!is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
 +    }
++    __ addi(cnt, cnt, -granularity);
++    __ beqz(cnt, done);
++    __ j(same_aligned);
 +
-+    // copy memory smaller than step * 8 bytes
-+    __ andi(tmp, count_in_bytes, granularity << 2);
-+    __ beqz(tmp, copy2);
-+    copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 4, step);
-+
-+    __ bind(copy2);
-+    __ andi(tmp, count_in_bytes, granularity << 1);
-+    __ beqz(tmp, copy1);
-+    copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 2, step);
-+
-+    __ bind(copy1);
-+    __ andi(tmp, count_in_bytes, granularity);
-+    __ beqz(tmp, finish);
-+    copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 1, step);
-+
-+    __ bind(finish);
-+  }
-+
-+  // Cyclic copy with one step.
-+  void copy_loop1(Register src, Register dst, Register count_in_bytes, int step, Register loopsize = noreg) {
-+    size_t granularity = uabs(step);
-+    Label loop1;
-+    if (loopsize == noreg) {
-+      loopsize = t0;
-+      __ mv(loopsize, granularity);
++    __ bind(copy8);
++    if (is_backwards) {
++      __ addi(src, src, -wordSize);
++      __ addi(dst, dst, -wordSize);
 +    }
-+
-+    __ bind(loop1);
-+    {
-+      copy_by_step(RegSet::of(x13), src, dst, /* unroll_factor */ 1, step);
-+      __ sub(count_in_bytes, count_in_bytes, granularity);
-+      __ bge(count_in_bytes, loopsize, loop1);
-+    }
-+  }
-+
-+  void align_unit(Register src, Register dst, Register count_in_bytes,
-+                  unsigned unit, bool is_backwards) {
-+    Label skip;
-+    __ andi(t0, dst, unit);
-+    __ beqz(t0, skip);
-+    copy_by_step(RegSet::of(t0), src, dst, 1, is_backwards ? -unit : unit);
-+    __ sub(count_in_bytes, count_in_bytes, unit);
-+    __ bind(skip);
-+  }
-+
-+  void copy_memory(bool is_align, Register s, Register d, Register count_in_elements,
-+                   Register tmp, int ele_step) {
-+
-+    bool is_backwards = ele_step < 0;
-+    unsigned int granularity = uabs(ele_step);
-+    Label Lcopy_small, Ldone, Lcopy_ele, Laligned;
-+    const Register count_in_bytes = x31, src = x28, dst = x29;
-+    assert_different_registers(src, dst, count_in_elements, count_in_bytes, tmp, t1);
-+    __ slli(count_in_bytes, count_in_elements, exact_log2(granularity));
-+    __ add(src, s, is_backwards ? count_in_bytes : zr);
-+    __ add(dst, d, is_backwards ? count_in_bytes : zr);
-+
-+    // if count_in_elements < 8, copy_small
-+    __ mv(t0, 8);
-+    if (is_align && granularity < BytesPerLong) {
-+      __ blt(count_in_bytes, t0, Lcopy_small);
-+    } else {
-+      __ blt(count_in_elements, t0, Lcopy_small);
-+    }
-+
-+    if (granularity < BytesPerLong) {
-+      Label Lcopy_aligned[3];
-+      Label Lalign8;
-+      if (!is_align) {
-+        Label Lalign_and_copy;
-+        __ mv(t0, EagerArrayCopyThreshold);
-+        __ blt(count_in_bytes, t0, Lalign_and_copy);
-+        // Align dst to 8.
-+        for (unsigned unit = granularity; unit <= 4; unit <<= 1) {
-+          align_unit(src, dst, count_in_bytes, unit, is_backwards);
-+        }
-+
-+        Register shr = x30, shl = x7, tmp1 = x13;
-+
-+        __ andi(shr, src, 0x7);
-+        __ beqz(shr, Lalign8);
-+        {
-+          // calculaute the shift for store doubleword
-+          __ slli(shr, shr, 3);
-+          __ sub(shl, shr, 64);
-+          __ sub(shl, zr, shl);
-+
-+          // alsrc: previous position of src octal alignment
-+          Register alsrc = t1;
-+          __ andi(alsrc, src, -8);
-+
-+          // move src to tail
-+          __ andi(t0, count_in_bytes, -8);
-+          if (is_backwards) {
-+            __ sub(src, src, t0);
-+          } else {
-+            __ add(src, src, t0);
-+          }
-+
-+          // prepare for copy_dstaligned_loop
-+          __ ld(tmp1, alsrc, 0);
-+          dst_aligned_copy_32bytes_loop(alsrc, dst, shr, shl, count_in_bytes, is_backwards);
-+          __ mv(x17, 8);
-+          __ blt(count_in_bytes, x17, Lcopy_small);
-+          dst_aligned_copy_8bytes_loop(alsrc, dst, shr, shl, count_in_bytes, x17, is_backwards);
-+          __ j(Lcopy_small);
-+        }
-+        __ j(Ldone);
-+        __ bind(Lalign_and_copy);
-+
-+        // Check src and dst could be 8/4/2 algined at the same time. If could, align the
-+        // memory and copy by 8/4/2.
-+        __ xorr(t1, src, dst);
-+
-+        for (unsigned alignment = granularity << 1; alignment <= 8; alignment <<= 1) {
-+          Label skip;
-+          unsigned int unit = alignment >> 1;
-+          // Check src and dst could be aligned to checkbyte at the same time
-+          // if copy from src to dst. If couldn't, jump to label not_aligned.
-+          __ andi(t0, t1, alignment - 1);
-+          __ bnez(t0, Lcopy_aligned[exact_log2(unit)]);
-+          // Align src and dst to unit.
-+          align_unit(src, dst, count_in_bytes, unit, is_backwards);
-+        }
-+      }
-+      __ bind(Lalign8);
-+      for (unsigned step_size = 8; step_size > granularity; step_size >>= 1) {
-+        // Copy memory by steps, which has been aligned to step_size.
-+        Label loop8, Ltail;
-+        int step = is_backwards ? (-step_size) : step_size;
-+        if (!(step_size == 8 && is_align)) { // which has load 8 to t0 before
-+          // Check whether the memory size is smaller than step_size.
-+          __ mv(t0, step_size);
-+          __ blt(count_in_bytes, t0, Ltail);
-+        }
-+        const Register eight_step = t1;
-+        __ mv(eight_step, step_size * 8);
-+        __ bge(count_in_bytes, eight_step, loop8);
-+        // If memory is less than 8*step_size bytes, loop by step.
-+        copy_loop1(src, dst, count_in_bytes, step, t0);
-+        copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size);
-+        __ j(Ldone);
-+
-+        __ bind(loop8);
-+        // If memory is greater than or equal to 8*step_size bytes, loop by step*8.
-+        copy_loop8(src, dst, count_in_bytes, tmp, step, NULL, eight_step);
-+        __ bind(Ltail);
-+        copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size);
-+        __ j(Ldone);
-+
-+        __ bind(Lcopy_aligned[exact_log2(step_size >> 1)]);
-+      }
++    __ ld(tmp3, Address(src));
++    __ sd(tmp3, Address(dst));
++    if (!is_backwards) {
++      __ addi(src, src, wordSize);
++      __ addi(dst, dst, wordSize);
 +    }
-+    // If the ele_step is greater than 8, or the memory src and dst cannot
-+    // be aligned with a number greater than the value of step.
-+    // Cyclic copy with 8*ele_step.
-+    copy_loop8(src, dst, count_in_bytes, tmp, ele_step, &Lcopy_small, noreg);
++    __ addi(cnt, cnt, -wordSize);
++    __ addi(tmp4, cnt, -8);
++    __ bgez(tmp4, copy8); // cnt >= 8, do next loop
 +
-+    __ bind(Ldone);
-+  }
++    __ beqz(cnt, done);
 +
-+  void dst_aligned_copy_32bytes_loop(Register alsrc, Register dst,
-+                                     Register shr,   Register shl,
-+                                     Register count_in_bytes, bool is_backwards) {
-+    const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16, thirty_two = x17;
-+    const Register sll_reg1 = is_backwards ? tmp1 : tmp2,
-+                   srl_reg1 = is_backwards ? tmp2 : tmp1,
-+                   sll_reg2 = is_backwards ? tmp2 : tmp3,
-+                   srl_reg2 = is_backwards ? tmp3 : tmp2,
-+                   sll_reg3 = is_backwards ? tmp3 : tmp4,
-+                   srl_reg3 = is_backwards ? tmp4 : tmp3,
-+                   sll_reg4 = is_backwards ? tmp4 : tmp1,
-+                   srl_reg4 = is_backwards ? tmp1 : tmp4;
-+    assert_different_registers(t0, thirty_two, alsrc, shr, shl);
-+    int unit = is_backwards ? -wordSize : wordSize;
-+    int offset = is_backwards ? -wordSize : 0;
-+    Label loop, done;
-+
-+    __ mv(thirty_two, 32);
-+    __ blt(count_in_bytes, thirty_two, done);
-+
-+    __ bind(loop);
-+    __ ld(tmp2, alsrc, unit);
-+    __ sll(t0, sll_reg1, shl);
-+    __ srl(tmp1, srl_reg1, shr);
-+    __ orr(tmp1, tmp1, t0);
-+    __ sd(tmp1, dst, offset);
-+
-+    __ ld(tmp3, alsrc, unit * 2);
-+    __ sll(t0, sll_reg2, shl);
-+    __ srl(tmp2, srl_reg2, shr);
-+    __ orr(tmp2, tmp2, t0);
-+    __ sd(tmp2, dst, unit + offset);
-+
-+    __ ld(tmp4, alsrc, unit * 3);
-+    __ sll(t0, sll_reg3, shl);
-+    __ srl(tmp3, srl_reg3, shr);
-+    __ orr(tmp3, tmp3, t0);
-+    __ sd(tmp3, dst, unit * 2 + offset);
-+
-+    __ ld(tmp1, alsrc, unit * 4);
-+    __ sll(t0, sll_reg4, shl);
-+    __ srl(tmp4, srl_reg4, shr);
-+    __ orr(tmp4, tmp4, t0);
-+    __ sd(tmp4, dst, unit * 3 + offset);
-+
-+    __ add(alsrc, alsrc, unit * 4);
-+    __ add(dst, dst, unit * 4);
-+    __ sub(count_in_bytes, count_in_bytes, 32);
-+    __ bge(count_in_bytes, thirty_two, loop);
++    __ bind(copy_small);
++    if (is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    (_masm->*ld_arr)(tmp3, Address(src), t0);
++    (_masm->*st_arr)(tmp3, Address(dst), t0);
++    if (!is_backwards) {
++      __ addi(src, src, step);
++      __ addi(dst, dst, step);
++    }
++    __ addi(cnt, cnt, -granularity);
++    __ bgtz(cnt, copy_small);
 +
 +    __ bind(done);
 +  }
 +
-+  void dst_aligned_copy_8bytes_loop(Register alsrc, Register dst,
-+                                    Register shr,   Register shl,
-+                                    Register count_in_bytes, Register eight,
-+                                    bool is_backwards) {
-+    const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16;
-+    const Register sll_reg = is_backwards ? tmp1 : tmp2,
-+                   srl_reg = is_backwards ? tmp2 : tmp1;
-+    assert_different_registers(t0, eight, alsrc, shr, shl);
-+    Label loop;
-+    int unit = is_backwards ? -wordSize : wordSize;
-+
-+    __ bind(loop);
-+    __ ld(tmp2, alsrc, unit);
-+    __ sll(t0, sll_reg, shl);
-+    __ srl(tmp1, srl_reg, shr);
-+    __ orr(t0, tmp1, t0);
-+    __ sd(t0, dst, is_backwards ? unit : 0);
-+    __ mv(tmp1, tmp2);
-+    __ add(alsrc, alsrc, unit);
-+    __ add(dst, dst, unit);
-+    __ sub(count_in_bytes, count_in_bytes, 8);
-+    __ bge(count_in_bytes, eight, loop);
-+  }
-+
 +  // Scan over array at a for count oops, verifying each one.
 +  // Preserves a and count, clobbers t0 and t1.
-+  void verify_oop_array(int size, Register a, Register count, Register temp) {
++  void verify_oop_array(size_t size, Register a, Register count, Register temp) {
 +    Label loop, end;
 +    __ mv(t1, zr);
 +    __ slli(t0, count, exact_log2(size));
@@ -44374,7 +42125,7 @@ index 000000000..c5b3b094c
 +    __ bgeu(t1, t0, end);
 +
 +    __ add(temp, a, t1);
-+    if (size == wordSize) {
++    if (size == (size_t)wordSize) {
 +      __ ld(temp, Address(temp, 0));
 +      __ verify_oop(temp);
 +    } else {
@@ -44399,7 +42150,7 @@ index 000000000..c5b3b094c
 +  //
 +  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 +  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomically.
++  // cache line boundaries will still be loaded and stored atomicly.
 +  //
 +  // Side Effects:
 +  //   disjoint_int_copy_entry is set to the no-overlap entry point
@@ -44435,16 +42186,17 @@ index 000000000..c5b3b094c
 +      // save regs before copy_memory
 +      __ push_reg(RegSet::of(d, count), sp);
 +    }
-+    copy_memory(aligned, s, d, count, t0, checked_cast<int>(size));
++
++    copy_memory(aligned, s, d, count, t0, size);
 +
 +    if (is_oop) {
 +      __ pop_reg(RegSet::of(d, count), sp);
 +      if (VerifyOops) {
-+        verify_oop_array(checked_cast<int>(size), d, count, t2);
++        verify_oop_array(size, d, count, t2);
 +      }
 +    }
 +
-+    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_reg);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
 +
 +    __ leave();
 +    __ mv(x10, zr); // return 0
@@ -44465,9 +42217,9 @@ index 000000000..c5b3b094c
 +  //
 +  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 +  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomically.
++  // cache line boundaries will still be loaded and stored atomicly.
 +  //
-+  address generate_conjoint_copy(int size, bool aligned, bool is_oop, address nooverlap_target,
++  address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
 +                                 address* entry, const char* name,
 +                                 bool dest_uninitialized = false) {
 +    const Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
@@ -44504,13 +42256,14 @@ index 000000000..c5b3b094c
 +    }
 +
 +    copy_memory(aligned, s, d, count, t0, -size);
++
 +    if (is_oop) {
 +      __ pop_reg(RegSet::of(d, count), sp);
 +      if (VerifyOops) {
 +        verify_oop_array(size, d, count, t2);
 +      }
 +    }
-+    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_regs);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet());
 +    __ leave();
 +    __ mv(x10, zr); // return 0
 +    __ ret();
@@ -44627,7 +42380,7 @@ index 000000000..c5b3b094c
 +  //
 +  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 +  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomically.
++  // cache line boundaries will still be loaded and stored atomicly.
 +  //
 +  // Side Effects:
 +  //   disjoint_int_copy_entry is set to the no-overlap entry point
@@ -44651,7 +42404,7 @@ index 000000000..c5b3b094c
 +  //
 +  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
 +  // the hardware handle it.  The two dwords within qwords that span
-+  // cache line boundaries will still be loaded and stored atomically.
++  // cache line boundaries will still be loaded and stored atomicly.
 +  //
 +  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
 +                                     address* entry, const char* name,
@@ -44715,7 +42468,7 @@ index 000000000..c5b3b094c
 +  address generate_disjoint_oop_copy(bool aligned, address* entry,
 +                                     const char* name, bool dest_uninitialized) {
 +    const bool is_oop = true;
-+    const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
++    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
 +    return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
 +  }
 +
@@ -44733,7 +42486,7 @@ index 000000000..c5b3b094c
 +                                     address nooverlap_target, address* entry,
 +                                     const char* name, bool dest_uninitialized) {
 +    const bool is_oop = true;
-+    const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
++    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
 +    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
 +                                  name, dest_uninitialized);
 +  }
@@ -44807,7 +42560,7 @@ index 000000000..c5b3b094c
 +
 +    __ enter(); // required for proper stackwalking of RuntimeStub frame
 +
-+    // Caller of this entry point must set up the argument registers
++    // Caller of this entry point must set up the argument registers.
 +    if (entry != NULL) {
 +      *entry = __ pc();
 +      BLOCK_COMMENT("Entry:");
@@ -44859,7 +42612,7 @@ index 000000000..c5b3b094c
 +    __ align(OptoLoopAlignment);
 +
 +    __ BIND(L_store_element);
-+    __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop
++    __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW);  // store the oop
 +    __ add(to, to, UseCompressedOops ? 4 : 8);
 +    __ sub(count, count, 1);
 +    __ beqz(count, L_do_card_marks);
@@ -45015,13 +42768,13 @@ index 000000000..c5b3b094c
 +    const Register dst_pos    = c_rarg3;  // destination position
 +    const Register length     = c_rarg4;
 +
++    // Registers used as temps
++    const Register dst_klass = c_rarg5;
++
 +    __ align(CodeEntryAlignment);
 +
 +    StubCodeMark mark(this, "StubRoutines", name);
 +
-+    // Registers used as temps
-+    const Register dst_klass = c_rarg5;
-+
 +    address start = __ pc();
 +
 +    __ enter(); // required for proper stackwalking of RuntimeStub frame
@@ -45265,7 +43018,7 @@ index 000000000..c5b3b094c
 +    }
 +
 +  __ BIND(L_failed);
-+    __ mv(x10, -1);
++    __ li(x10, -1);
 +    __ leave();   // required for proper stackwalking of RuntimeStub frame
 +    __ ret();
 +
@@ -45456,6 +43209,9 @@ index 000000000..c5b3b094c
 +    address entry_jlong_arraycopy     = NULL;
 +    address entry_checkcast_arraycopy = NULL;
 +
++    generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards);
++    generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards);
++
 +    StubRoutines::riscv::_zero_blocks = generate_zero_blocks();
 +
 +    //*** jbyte
@@ -45598,42 +43354,42 @@ index 000000000..c5b3b094c
 +    __ ret();
 +  }
 +
-+#ifdef COMPILER2
 +  // code for comparing 16 bytes of strings with same encoding
-+  void compare_string_16_bytes_same(Label& DIFF1, Label& DIFF2) {
++  void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
 +    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
 +    __ ld(tmp5, Address(str1));
-+    __ addi(str1, str1, wordSize);
++    __ addi(str1, str1, 8);
 +    __ xorr(tmp4, tmp1, tmp2);
 +    __ ld(cnt1, Address(str2));
-+    __ addi(str2, str2, wordSize);
++    __ addi(str2, str2, 8);
 +    __ bnez(tmp4, DIFF1);
 +    __ ld(tmp1, Address(str1));
-+    __ addi(str1, str1, wordSize);
++    __ addi(str1, str1, 8);
 +    __ xorr(tmp4, tmp5, cnt1);
 +    __ ld(tmp2, Address(str2));
-+    __ addi(str2, str2, wordSize);
++    __ addi(str2, str2, 8);
 +    __ bnez(tmp4, DIFF2);
 +  }
 +
 +  // code for comparing 8 characters of strings with Latin1 and Utf16 encoding
-+  void compare_string_8_x_LU(Register tmpL, Register tmpU, Register strL, Register strU, Label& DIFF) {
-+    const Register tmp = x30;
++  void compare_string_8_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
++                              Label &DIFF2) {
++    const Register strU = x12, curU = x7, strL = x29, tmp = x30;
 +    __ ld(tmpL, Address(strL));
-+    __ addi(strL, strL, wordSize);
++    __ addi(strL, strL, 8);
 +    __ ld(tmpU, Address(strU));
-+    __ addi(strU, strU, wordSize);
++    __ addi(strU, strU, 8);
 +    __ inflate_lo32(tmp, tmpL);
 +    __ mv(t0, tmp);
-+    __ xorr(tmp, tmpU, t0);
-+    __ bnez(tmp, DIFF);
++    __ xorr(tmp, curU, t0);
++    __ bnez(tmp, DIFF2);
 +
-+    __ ld(tmpU, Address(strU));
-+    __ addi(strU, strU, wordSize);
++    __ ld(curU, Address(strU));
++    __ addi(strU, strU, 8);
 +    __ inflate_hi32(tmp, tmpL);
 +    __ mv(t0, tmp);
 +    __ xorr(tmp, tmpU, t0);
-+    __ bnez(tmp, DIFF);
++    __ bnez(tmp, DIFF1);
 +  }
 +
 +  // x10  = result
@@ -45648,7 +43404,8 @@ index 000000000..c5b3b094c
 +    __ align(CodeEntryAlignment);
 +    StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL");
 +    address entry = __ pc();
-+    Label SMALL_LOOP, TAIL, LOAD_LAST, DIFF, DONE, CALCULATE_DIFFERENCE;
++    Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
++          DONE, CALCULATE_DIFFERENCE;
 +    const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14,
 +                   tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31;
 +    RegSet spilled_regs = RegSet::of(tmp4, tmp5);
@@ -45659,9 +43416,16 @@ index 000000000..c5b3b094c
 +    __ mv(isLU ? tmp1 : tmp2, tmp3);
 +    __ addi(str1, str1, isLU ? wordSize / 2 : wordSize);
 +    __ addi(str2, str2, isLU ? wordSize : wordSize / 2);
-+    __ sub(cnt2, cnt2, wordSize / 2); // Already loaded 4 symbols.
++    __ sub(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
 +    __ push_reg(spilled_regs, sp);
 +
++    if (isLU) {
++      __ add(str1, str1, cnt2);
++      __ shadd(str2, cnt2, str2, t0, 1);
++    } else {
++      __ shadd(str1, cnt2, str1, t0, 1);
++      __ add(str2, str2, cnt2);
++    }
 +    __ xorr(tmp3, tmp1, tmp2);
 +    __ mv(tmp5, tmp2);
 +    __ bnez(tmp3, CALCULATE_DIFFERENCE);
@@ -45671,71 +43435,47 @@ index 000000000..c5b3b094c
 +             tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison
 +             tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison
 +
-+    // make sure main loop is byte-aligned, we should load another 4 bytes from strL
-+    __ beqz(cnt2, DONE);  // no characters left
-+    __ lwu(tmpL, Address(strL));
-+    __ addi(strL, strL, wordSize / 2);
-+    __ ld(tmpU, Address(strU));
-+    __ addi(strU, strU, wordSize);
-+    __ inflate_lo32(tmp3, tmpL);
-+    __ mv(tmpL, tmp3);
-+    __ xorr(tmp3, tmpU, tmpL);
-+    __ bnez(tmp3, CALCULATE_DIFFERENCE);
-+    __ addi(cnt2, cnt2, -wordSize / 2);
++    __ sub(tmp2, strL, cnt2); // strL pointer to load from
++    __ slli(t0, cnt2, 1);
++    __ sub(cnt1, strU, t0); // strU pointer to load from
 +
-+    __ beqz(cnt2, DONE);  // no character left
-+    __ sub(cnt2, cnt2, wordSize * 2);
++    __ ld(tmp4, Address(cnt1));
++    __ addi(cnt1, cnt1, 8);
++    __ beqz(cnt2, LOAD_LAST); // no characters left except last load
++    __ sub(cnt2, cnt2, 16);
 +    __ bltz(cnt2, TAIL);
 +    __ bind(SMALL_LOOP); // smaller loop
-+      __ sub(cnt2, cnt2, wordSize * 2);
-+      compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
-+      compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
++      __ sub(cnt2, cnt2, 16);
++      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
++      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
 +      __ bgez(cnt2, SMALL_LOOP);
-+      __ addi(t0, cnt2, wordSize * 2);
-+      __ beqz(t0, DONE);
-+    __ bind(TAIL);  // 1..15 characters left
-+      if (AvoidUnalignedAccesses) {
-+        // Aligned access. Load bytes from byte-aligned address,
-+        // which may contain invalid bytes in last load.
-+        // Invalid bytes should be removed before comparison.
-+        Label LOAD_LAST, WORD_CMP;
-+        __ addi(t0, cnt2, wordSize);
-+        __ bgtz(t0, LOAD_LAST);
-+        // remaining characters is greater than or equals to 8, we can do one compare_string_8_x_LU
-+        compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
-+        __ addi(cnt2, cnt2, wordSize);
-+        __ beqz(cnt2, DONE);  // no character left
-+        __ bind(LOAD_LAST);   // 1..7 characters left
-+        __ lwu(tmpL, Address(strL));
-+        __ addi(strL, strL, wordSize / 2);
-+        __ ld(tmpU, Address(strU));
-+        __ addi(strU, strU, wordSize);
-+        __ inflate_lo32(tmp3, tmpL);
-+        __ mv(tmpL, tmp3);
-+        __ addi(t0, cnt2, wordSize / 2);
-+        __ blez(t0, WORD_CMP);
-+        __ slli(t0, t0, 1); // now in bytes
-+        __ slli(t0, t0, LogBitsPerByte);
-+        __ sll(tmpL, tmpL, t0);
-+        __ sll(tmpU, tmpU, t0);
-+        // remaining characters is greater than or equals to 4, we can do one full 4-byte comparison
-+        __ bind(WORD_CMP);
-+        __ xorr(tmp3, tmpU, tmpL);
-+        __ bnez(tmp3, CALCULATE_DIFFERENCE);
-+        __ addi(cnt2, cnt2, wordSize / 2);
-+        __ bltz(cnt2, LOAD_LAST); // 1..3 characters left
-+        __ j(DONE); // no character left
-+      } else {
-+        // Unaligned accesses. Load from non-byte aligned address.
-+        __ shadd(strU, cnt2, strU, t0, 1); // convert cnt2 into bytes and get Address of last 8 bytes in UTF-16 string
-+        __ add(strL, strL, cnt2); // Address of last 16 bytes in Latin1 string
-+        // last 16 characters
-+        compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
-+        compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF);
-+        __ j(DONE);
-+      }
-+    __ bind(DIFF);
++      __ addi(t0, cnt2, 16);
++      __ beqz(t0, LOAD_LAST);
++    __ bind(TAIL); // 1..15 characters left until last load (last 4 characters)
++      // Address of 8 bytes before last 4 characters in UTF-16 string
++      __ shadd(cnt1, cnt2, cnt1, t0, 1);
++      // Address of 16 bytes before last 4 characters in Latin1 string
++      __ add(tmp2, tmp2, cnt2);
++      __ ld(tmp4, Address(cnt1, -8));
++      // last 16 characters before last load
++      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
++      compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2);
++      __ j(LOAD_LAST);
++    __ bind(DIFF2);
++      __ mv(tmpU, tmp4);
++    __ bind(DIFF1);
 +      __ mv(tmpL, t0);
++      __ j(CALCULATE_DIFFERENCE);
++    __ bind(LOAD_LAST);
++      // Last 4 UTF-16 characters are already pre-loaded into tmp4 by compare_string_8_x_LU.
++      // No need to load it again
++      __ mv(tmpU, tmp4);
++      __ ld(tmpL, Address(strL));
++      __ inflate_lo32(tmp3, tmpL);
++      __ mv(tmpL, tmp3);
++      __ xorr(tmp3, tmpU, tmpL);
++      __ beqz(tmp3, DONE);
++
 +      // Find the first different characters in the longwords and
 +      // compute their difference.
 +    __ bind(CALCULATE_DIFFERENCE);
@@ -45778,49 +43518,35 @@ index 000000000..c5b3b094c
 +    __ add(str1, str1, wordSize);
 +    __ add(str2, str2, wordSize);
 +    // less than 16 bytes left?
-+    __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize);
++    __ sub(cnt2, cnt2, isLL ? 16 : 8);
 +    __ push_reg(spilled_regs, sp);
 +    __ bltz(cnt2, TAIL);
 +    __ bind(SMALL_LOOP);
 +      compare_string_16_bytes_same(DIFF, DIFF2);
-+      __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize);
++      __ sub(cnt2, cnt2, isLL ? 16 : 8);
 +      __ bgez(cnt2, SMALL_LOOP);
 +    __ bind(TAIL);
-+      __ addi(cnt2, cnt2, isLL ? 2 * wordSize : wordSize);
++      __ addi(cnt2, cnt2, isLL ? 16 : 8);
 +      __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF);
-+      __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2);
++      __ sub(cnt2, cnt2, isLL ? 8 : 4);
 +      __ blez(cnt2, CHECK_LAST);
 +      __ xorr(tmp4, tmp1, tmp2);
 +      __ bnez(tmp4, DIFF);
 +      __ ld(tmp1, Address(str1));
-+      __ addi(str1, str1, wordSize);
++      __ addi(str1, str1, 8);
 +      __ ld(tmp2, Address(str2));
-+      __ addi(str2, str2, wordSize);
-+      __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2);
++      __ addi(str2, str2, 8);
++      __ sub(cnt2, cnt2, isLL ? 8 : 4);
 +    __ bind(CHECK_LAST);
 +      if (!isLL) {
 +        __ add(cnt2, cnt2, cnt2); // now in bytes
 +      }
 +      __ xorr(tmp4, tmp1, tmp2);
 +      __ bnez(tmp4, DIFF);
-+      if (AvoidUnalignedAccesses) {
-+        // Aligned access. Load bytes from byte-aligned address,
-+        // which may contain invalid bytes in last load.
-+        // Invalid bytes should be removed before comparison.
-+        __ ld(tmp5, Address(str1));
-+        __ ld(cnt1, Address(str2));
-+        __ neg(cnt2, cnt2);
-+        __ slli(cnt2, cnt2, LogBitsPerByte);
-+        __ sll(tmp5, tmp5, cnt2);
-+        __ sll(cnt1, cnt1, cnt2);
-+      } else {
-+        // Unaligned access. Load from non-byte aligned address.
-+        __ add(str1, str1, cnt2);
-+        __ ld(tmp5, Address(str1));
-+        __ add(str2, str2, cnt2);
-+        __ ld(cnt1, Address(str2));
-+      }
-+
++      __ add(str1, str1, cnt2);
++      __ ld(tmp5, Address(str1));
++      __ add(str2, str2, cnt2);
++      __ ld(cnt1, Address(str2));
 +      __ xorr(tmp4, tmp5, cnt1);
 +      __ beqz(tmp4, LENGTH_DIFF);
 +      // Find the first different characters in the longwords and
@@ -45889,7 +43615,7 @@ index 000000000..c5b3b094c
 +    // parameters
 +    Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14;
 +    // temporary registers
-+    Register mask1 = x20, match_mask = x21, first = x22, trailing_zero = x23, mask2 = x24, tmp = x25;
++    Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25;
 +    // redefinitions
 +    Register ch1 = x28, ch2 = x29;
 +    RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29);
@@ -45910,9 +43636,13 @@ index 000000000..c5b3b094c
 +
 +    // first is needle[0]
 +    __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first);
-+    __ mv(mask1, haystack_isL ? 0x0101010101010101 : 0x0001000100010001);
++    uint64_t mask0101 = UCONST64(0x0101010101010101);
++    uint64_t mask0001 = UCONST64(0x0001000100010001);
++    __ mv(mask1, haystack_isL ? mask0101 : mask0001);
 +    __ mul(first, first, mask1);
-+    __ mv(mask2, haystack_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
++    uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f);
++    uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff);
++    __ mv(mask2, haystack_isL ? mask7f7f : mask7fff);
 +    if (needle_isL != haystack_isL) {
 +      __ mv(tmp, ch1);
 +    }
@@ -45920,7 +43650,7 @@ index 000000000..c5b3b094c
 +    __ blez(haystack_len, L_SMALL);
 +
 +    if (needle_isL != haystack_isL) {
-+      __ inflate_lo32(ch1, tmp, match_mask, trailing_zero);
++      __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
 +    }
 +    // xorr, sub, orr, notr, andr
 +    // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i]
@@ -45957,7 +43687,7 @@ index 000000000..c5b3b094c
 +    __ xorr(ch2, first, ch2);
 +    __ sub(match_mask, ch2, mask1);
 +    __ orr(ch2, ch2, mask2);
-+    __ mv(trailing_zero, -1); // all bits set
++    __ mv(trailing_zeros, -1); // all bits set
 +    __ j(L_SMALL_PROCEED);
 +
 +    __ align(OptoLoopAlignment);
@@ -45965,42 +43695,42 @@ index 000000000..c5b3b094c
 +    __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift);
 +    __ neg(haystack_len, haystack_len);
 +    if (needle_isL != haystack_isL) {
-+      __ inflate_lo32(ch1, tmp, match_mask, trailing_zero);
++      __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros);
 +    }
 +    __ xorr(ch2, first, ch2);
 +    __ sub(match_mask, ch2, mask1);
 +    __ orr(ch2, ch2, mask2);
-+    __ mv(trailing_zero, -1); // all bits set
++    __ mv(trailing_zeros, -1); // all bits set
 +
 +    __ bind(L_SMALL_PROCEED);
-+    __ srl(trailing_zero, trailing_zero, haystack_len); // mask. zeroes on useless bits.
++    __ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits.
 +    __ notr(ch2, ch2);
 +    __ andr(match_mask, match_mask, ch2);
-+    __ andr(match_mask, match_mask, trailing_zero); // clear useless bits and check
++    __ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check
 +    __ beqz(match_mask, NOMATCH);
 +
 +    __ bind(L_SMALL_HAS_ZERO_LOOP);
-+    __ ctzc_bit(trailing_zero, match_mask, haystack_isL, ch2, tmp); // count trailing zeros
-+    __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
++    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros
++    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
 +    __ mv(ch2, wordSize / haystack_chr_size);
 +    __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2);
-+    __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
-+    __ mv(trailing_zero, wordSize / haystack_chr_size);
++    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
++    __ mv(trailing_zeros, wordSize / haystack_chr_size);
 +    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
 +
 +    __ bind(L_SMALL_CMP_LOOP);
-+    __ shadd(first, trailing_zero, needle, first, needle_chr_shift);
-+    __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift);
++    __ shadd(first, trailing_zeros, needle, first, needle_chr_shift);
++    __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
 +    needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first));
 +    haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
-+    __ add(trailing_zero, trailing_zero, 1);
-+    __ bge(trailing_zero, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
++    __ add(trailing_zeros, trailing_zeros, 1);
++    __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP);
 +    __ beq(first, ch2, L_SMALL_CMP_LOOP);
 +
 +    __ bind(L_SMALL_CMP_LOOP_NOMATCH);
 +    __ beqz(match_mask, NOMATCH);
-+    __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2);
-+    __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
++    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
++    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
 +    __ add(result, result, 1);
 +    __ add(haystack, haystack, haystack_chr_size);
 +    __ j(L_SMALL_HAS_ZERO_LOOP);
@@ -46012,14 +43742,14 @@ index 000000000..c5b3b094c
 +
 +    __ align(OptoLoopAlignment);
 +    __ bind(L_SMALL_CMP_LOOP_LAST_CMP2);
-+    __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
++    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
 +    __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH);
 +    __ j(DONE);
 +
 +    __ align(OptoLoopAlignment);
 +    __ bind(L_HAS_ZERO);
-+    __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2);
-+    __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
++    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2);
++    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
 +    __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2);
 +    __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits)
 +    __ sub(result, result, 1); // array index from 0, so result -= 1
@@ -46029,26 +43759,26 @@ index 000000000..c5b3b094c
 +    __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2);
 +    __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2);
 +    // load next 8 bytes from haystack, and increase result index
-+    __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
++    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
 +    __ add(result, result, 1);
-+    __ mv(trailing_zero, wordSize / haystack_chr_size);
++    __ mv(trailing_zeros, wordSize / haystack_chr_size);
 +    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
 +
 +    // compare one char
 +    __ bind(L_CMP_LOOP);
-+    __ shadd(needle_len, trailing_zero, needle, needle_len, needle_chr_shift);
++    __ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift);
 +    needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len));
-+    __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift);
++    __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift);
 +    haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2));
-+    __ add(trailing_zero, trailing_zero, 1); // next char index
++    __ add(trailing_zeros, trailing_zeros, 1); // next char index
 +    __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2);
-+    __ bge(trailing_zero, tmp, L_CMP_LOOP_LAST_CMP);
++    __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP);
 +    __ beq(needle_len, ch2, L_CMP_LOOP);
 +
 +    __ bind(L_CMP_LOOP_NOMATCH);
 +    __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH);
-+    __ ctzc_bit(trailing_zero, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index
-+    __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15);
++    __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index
++    __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15);
 +    __ add(haystack, haystack, haystack_chr_size);
 +    __ j(L_HAS_ZERO_LOOP);
 +
@@ -46059,7 +43789,7 @@ index 000000000..c5b3b094c
 +
 +    __ align(OptoLoopAlignment);
 +    __ bind(L_CMP_LOOP_LAST_CMP2);
-+    __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL);
++    __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL);
 +    __ add(result, result, 1);
 +    __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH);
 +    __ j(DONE);
@@ -46101,28 +43831,28 @@ index 000000000..c5b3b094c
 +    StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
 +  }
 +
++#ifdef COMPILER2
 +  address generate_mulAdd()
 +  {
 +    __ align(CodeEntryAlignment);
 +    StubCodeMark mark(this, "StubRoutines", "mulAdd");
 +
-+    address start = __ pc();
++    address entry = __ pc();
 +
 +    const Register out     = x10;
 +    const Register in      = x11;
 +    const Register offset  = x12;
 +    const Register len     = x13;
 +    const Register k       = x14;
-+    const Register tmp1    = x28;
-+    const Register tmp2    = x29;
++    const Register tmp     = x28;
 +
 +    BLOCK_COMMENT("Entry:");
 +    __ enter();
-+    __ mul_add(out, in, offset, len, k, tmp1, tmp2);
++    __ mul_add(out, in, offset, len, k, tmp);
 +    __ leave();
 +    __ ret();
 +
-+    return start;
++    return entry;
 +  }
 +
 +  /**
@@ -46132,7 +43862,7 @@ index 000000000..c5b3b094c
 +   *    c_rarg0   - x address
 +   *    c_rarg1   - x length
 +   *    c_rarg2   - y address
-+   *    c_rarg3   - y lenth
++   *    c_rarg3   - y length
 +   *    c_rarg4   - z address
 +   *    c_rarg5   - z length
 +   */
@@ -46140,7 +43870,7 @@ index 000000000..c5b3b094c
 +  {
 +    __ align(CodeEntryAlignment);
 +    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
-+    address start = __ pc();
++    address entry = __ pc();
 +
 +    const Register x     = x10;
 +    const Register xlen  = x11;
@@ -46157,26 +43887,20 @@ index 000000000..c5b3b094c
 +    const Register tmp6  = x30;
 +    const Register tmp7  = x31;
 +
-+    RegSet spilled_regs = RegSet::of(tmp1, tmp2);
 +    BLOCK_COMMENT("Entry:");
 +    __ enter(); // required for proper stackwalking of RuntimeStub frame
-+    __ push_reg(spilled_regs, sp);
 +    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
-+    __ pop_reg(spilled_regs, sp);
 +    __ leave(); // required for proper stackwalking of RuntimeStub frame
 +    __ ret();
 +
-+    return start;
++    return entry;
 +  }
 +
 +  address generate_squareToLen()
 +  {
-+    // squareToLen algorithm for sizes 1..127 described in java code works
-+    // faster than multiply_to_len on some CPUs and slower on others, but
-+    // multiply_to_len shows a bit better overall results
 +    __ align(CodeEntryAlignment);
 +    StubCodeMark mark(this, "StubRoutines", "squareToLen");
-+    address start = __ pc();
++    address entry = __ pc();
 +
 +    const Register x     = x10;
 +    const Register xlen  = x11;
@@ -46193,131 +43917,17 @@ index 000000000..c5b3b094c
 +    const Register tmp6  = x30;
 +    const Register tmp7  = x31;
 +
-+    RegSet spilled_regs = RegSet::of(y, tmp2);
 +    BLOCK_COMMENT("Entry:");
 +    __ enter();
-+    __ push_reg(spilled_regs, sp);
 +    __ mv(y, x);
 +    __ mv(ylen, xlen);
 +    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
-+    __ pop_reg(spilled_regs, sp);
 +    __ leave();
 +    __ ret();
 +
-+    return start;
-+  }
-+#endif // COMPILER2
-+
-+  // Continuation point for throwing of implicit exceptions that are
-+  // not handled in the current activation. Fabricates an exception
-+  // oop and initiates normal exception dispatching in this
-+  // frame. Since we need to preserve callee-saved values (currently
-+  // only for C2, but done for C1 as well) we need a callee-saved oop
-+  // map and therefore have to make these stubs into RuntimeStubs
-+  // rather than BufferBlobs.  If the compiler needs all registers to
-+  // be preserved between the fault point and the exception handler
-+  // then it must assume responsibility for that in
-+  // AbstractCompiler::continuation_for_implicit_null_exception or
-+  // continuation_for_implicit_division_by_zero_exception. All other
-+  // implicit exceptions (e.g., NullPointerException or
-+  // AbstractMethodError on entry) are either at call sites or
-+  // otherwise assume that stack unwinding will be initiated, so
-+  // caller saved registers were assumed volatile in the compiler.
-+
-+#undef __
-+#define __ masm->
-+
-+  address generate_throw_exception(const char* name,
-+                                   address runtime_entry,
-+                                   Register arg1 = noreg,
-+                                   Register arg2 = noreg) {
-+    // Information about frame layout at time of blocking runtime call.
-+    // Note that we only have to preserve callee-saved registers since
-+    // the compilers are responsible for supplying a continuation point
-+    // if they expect all registers to be preserved.
-+    // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
-+    assert_cond(runtime_entry != NULL);
-+    enum layout {
-+      fp_off = 0,
-+      fp_off2,
-+      return_off,
-+      return_off2,
-+      framesize // inclusive of return address
-+    };
-+
-+    const int insts_size = 512;
-+    const int locs_size  = 64;
-+
-+    CodeBuffer code(name, insts_size, locs_size);
-+    OopMapSet* oop_maps  = new OopMapSet();
-+    MacroAssembler* masm = new MacroAssembler(&code);
-+    assert_cond(oop_maps != NULL && masm != NULL);
-+
-+    address start = __ pc();
-+
-+    // This is an inlined and slightly modified version of call_VM
-+    // which has the ability to fetch the return PC out of
-+    // thread-local storage and also sets up last_Java_sp slightly
-+    // differently than the real call_VM
-+
-+    __ enter(); // Save FP and RA before call
-+
-+    assert(is_even(framesize / 2), "sp not 16-byte aligned");
-+
-+    // ra and fp are already in place
-+    __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
-+
-+    int frame_complete = __ pc() - start;
-+
-+    // Set up last_Java_sp and last_Java_fp
-+    address the_pc = __ pc();
-+    __ set_last_Java_frame(sp, fp, the_pc, t0);
-+
-+    // Call runtime
-+    if (arg1 != noreg) {
-+      assert(arg2 != c_rarg1, "clobbered");
-+      __ mv(c_rarg1, arg1);
-+    }
-+    if (arg2 != noreg) {
-+      __ mv(c_rarg2, arg2);
-+    }
-+    __ mv(c_rarg0, xthread);
-+    BLOCK_COMMENT("call runtime_entry");
-+    int32_t offset = 0;
-+    __ movptr_with_offset(t0, runtime_entry, offset);
-+    __ jalr(x1, t0, offset);
-+
-+    // Generate oop map
-+    OopMap* map = new OopMap(framesize, 0);
-+    assert_cond(map != NULL);
-+
-+    oop_maps->add_gc_map(the_pc - start, map);
-+
-+    __ reset_last_Java_frame(true);
-+
-+    __ leave();
-+
-+    // check for pending exceptions
-+#ifdef ASSERT
-+    Label L;
-+    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
-+    __ bnez(t0, L);
-+    __ should_not_reach_here();
-+    __ bind(L);
-+#endif // ASSERT
-+    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-+
-+
-+    // codeBlob framesize is in words (not VMRegImpl::slot_size)
-+    RuntimeStub* stub =
-+      RuntimeStub::new_runtime_stub(name,
-+                                    &code,
-+                                    frame_complete,
-+                                    (framesize >> (LogBytesPerWord - LogBytesPerInt)),
-+                                    oop_maps, false);
-+    assert(stub != NULL, "create runtime stub fail!");
-+    return stub->entry_point();
++    return entry;
 +  }
++#endif
 +
 +#ifdef COMPILER2
 +  class MontgomeryMultiplyGenerator : public MacroAssembler {
@@ -46534,10 +44144,12 @@ index 000000000..c5b3b094c
 +      // Rj == i-len
 +
 +      // Ra as temp register
-+      shadd(Pa, Rj, Pa_base, Ra, LogBytesPerWord);
-+      shadd(Pm, Rj, Pm_base, Ra, LogBytesPerWord);
-+      shadd(Pb, len, Pb_base, Ra, LogBytesPerWord);
-+      shadd(Pn, len, Pn_base, Ra, LogBytesPerWord);
++      slli(Ra, Rj, LogBytesPerWord);
++      add(Pa, Pa_base, Ra);
++      add(Pm, Pm_base, Ra);
++      slli(Ra, len, LogBytesPerWord);
++      add(Pb, Pb_base, Ra);
++      add(Pn, Pn_base, Ra);
 +
 +      // Ra = *++Pa;
 +      // Rb = *--Pb;
@@ -46566,7 +44178,8 @@ index 000000000..c5b3b094c
 +      // store it.
 +      // Pm_base[i-len] = tmp0;
 +      // Rj as temp register
-+      shadd(Rj, Rj, Pm_base, Rj, LogBytesPerWord);
++      slli(Rj, Rj, LogBytesPerWord);
++      add(Rj, Pm_base, Rj);
 +      sd(tmp0, Address(Rj));
 +
 +      // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0;
@@ -46593,14 +44206,15 @@ index 000000000..c5b3b094c
 +          ld(Rm, Address(Rm));
 +          add(Rn, Pn_base, Rn);
 +          ld(Rn, Address(Rn));
-+          mv(t0, 1); // set carry flag, i.e. no borrow
++          li(t0, 1); // set carry flag, i.e. no borrow
 +          align(16);
 +          bind(loop); {
 +            notr(Rn, Rn);
 +            add(Rm, Rm, t0);
 +            add(Rm, Rm, Rn);
 +            sltu(t0, Rm, Rn);
-+            shadd(Rn, i, Pm_base, Rn, LogBytesPerWord); // Rn as temp register
++            slli(Rn, i, LogBytesPerWord); // Rn as temp register
++            add(Rn, Pm_base, Rn);
 +            sd(Rm, Address(Rn));
 +            add(i, i, 1);
 +            slli(Rn, i, LogBytesPerWord);
@@ -46624,7 +44238,8 @@ index 000000000..c5b3b094c
 +    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
 +      assert(tmp1 < x28 && tmp2 < x28, "register corruption");
 +
-+      shadd(s, len, s, tmp1, LogBytesPerWord);
++      slli(tmp1, len, LogBytesPerWord);
++      add(s, s, tmp1);
 +      mv(tmp1, len);
 +      unroll_2(tmp1,  &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
 +      slli(tmp1, len, LogBytesPerWord);
@@ -46673,7 +44288,6 @@ index 000000000..c5b3b094c
 +      ld(Rn, Address(Pn));
 +    }
 +
-+
 +    void post1_squaring() {
 +      acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2);  // The pending m*n
 +
@@ -46704,7 +44318,7 @@ index 000000000..c5b3b094c
 +      // the carry flag iff tmp0 is nonzero.
 +      //
 +      // mul(Rlo_mn, Rm, Rn);
-+      // cad(zr, tmp0, Rlo_mn);
++      // cad(zr, tmp, Rlo_mn);
 +      addi(t0, tmp0, -1);
 +      sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero
 +      cadc(tmp0, tmp1, Rhi_mn, t0);
@@ -46757,7 +44371,7 @@ index 000000000..c5b3b094c
 +      enter();
 +
 +      // Make room.
-+      mv(Ra, 512);
++      li(Ra, 512);
 +      bgt(Rlen, Ra, argh);
 +      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
 +      sub(Ra, sp, Ra);
@@ -46783,7 +44397,7 @@ index 000000000..c5b3b094c
 +      {
 +        ld(Rn, Address(Pn_base));
 +        mul(Rlo_mn, Rn, inv);
-+        mv(t0, -1);
++        li(t0, -1);
 +        Label ok;
 +        beq(Rlo_mn, t0, ok);
 +        stop("broken inverse in Montgomery multiply");
@@ -46820,8 +44434,8 @@ index 000000000..c5b3b094c
 +      block_comment("for (int i = len; i < 2*len; i++) {");
 +      mv(Ri, Rlen); {
 +        Label loop, end;
-+        slli(Rj, Rlen, 1); // Rj as temp register
-+        bge(Ri, Rj, end);
++        slli(t0, Rlen, 1);
++        bge(Ri, t0, end);
 +
 +        bind(loop);
 +        pre2(Ri, Rlen);
@@ -46835,13 +44449,12 @@ index 000000000..c5b3b094c
 +
 +        post2(Ri, Rlen);
 +        addw(Ri, Ri, 1);
-+        slli(Rj, Rlen, 1);
-+        blt(Ri, Rj, loop);
++        slli(t0, Rlen, 1);
++        blt(Ri, t0, loop);
 +        bind(end);
 +      }
 +      block_comment("} // i");
 +
-+
 +      normalize(Rlen);
 +
 +      mv(Ra, Pm_base);  // Save Pm_base in Ra
@@ -46880,7 +44493,7 @@ index 000000000..c5b3b094c
 +      enter();
 +
 +      // Make room.
-+      mv(Ra, 512);
++      li(Ra, 512);
 +      bgt(Rlen, Ra, argh);
 +      slli(Ra, Rlen, exact_log2(4 * sizeof(jint)));
 +      sub(Ra, sp, Ra);
@@ -46938,8 +44551,8 @@ index 000000000..c5b3b094c
 +      mv(Ri, Rlen); {
 +        Label loop, end;
 +        bind(loop);
-+        slli(Rj, Rlen, 1);
-+        bge(Ri, Rj, end);
++        slli(t0, Rlen, 1);
++        bge(Ri, t0, end);
 +
 +        pre2(Ri, Rlen);
 +
@@ -46985,6 +44598,117 @@ index 000000000..c5b3b094c
 +  };
 +#endif // COMPILER2
 +
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
++
++#undef __
++#define __ masm->
++
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   Register arg1 = noreg,
++                                   Register arg2 = noreg) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0
++    assert_cond(runtime_entry != NULL);
++    enum layout {
++      fp_off = 0,
++      fp_off2,
++      return_off,
++      return_off2,
++      framesize // inclusive of return address
++    };
++
++    const int insts_size = 512;
++    const int locs_size  = 64;
++
++    CodeBuffer code(name, insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++    assert_cond(oop_maps != NULL && masm != NULL);
++
++    address start = __ pc();
++
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
++
++    __ enter(); // Save FP and RA before call
++
++    assert(is_even(framesize / 2), "sp not 16-byte aligned");
++
++    // ra and fp are already in place
++    __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog
++
++    int frame_complete = __ pc() - start;
++
++    // Set up last_Java_sp and last_Java_fp
++    address the_pc = __ pc();
++    __ set_last_Java_frame(sp, fp, the_pc, t0);
++
++    // Call runtime
++    if (arg1 != noreg) {
++      assert(arg2 != c_rarg1, "clobbered");
++      __ mv(c_rarg1, arg1);
++    }
++    if (arg2 != noreg) {
++      __ mv(c_rarg2, arg2);
++    }
++    __ mv(c_rarg0, xthread);
++    BLOCK_COMMENT("call runtime_entry");
++    int32_t offset = 0;
++    __ movptr_with_offset(t0, runtime_entry, offset);
++    __ jalr(x1, t0, offset);
++
++    // Generate oop map
++    OopMap* map = new OopMap(framesize, 0);
++    assert_cond(map != NULL);
++
++    oop_maps->add_gc_map(the_pc - start, map);
++
++    __ reset_last_Java_frame(true);
++
++    __ leave();
++
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
++    __ bnez(t0, L);
++    __ should_not_reach_here();
++    __ bind(L);
++#endif // ASSERT
++    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++
++
++    // codeBlob framesize is in words (not VMRegImpl::slot_size)
++    RuntimeStub* stub =
++      RuntimeStub::new_runtime_stub(name,
++                                    &code,
++                                    frame_complete,
++                                    (framesize >> (LogBytesPerWord - LogBytesPerInt)),
++                                    oop_maps, false);
++    assert(stub != NULL, "create runtime stub fail!");
++    return stub->entry_point();
++  }
++
 +  // Initialization
 +  void generate_initial() {
 +    // Generate initial stubs and initializes the entry points
@@ -47012,6 +44736,13 @@ index 000000000..c5b3b094c
 +      generate_throw_exception("delayed StackOverflowError throw_exception",
 +                               CAST_FROM_FN_PTR(address,
 +                                                SharedRuntime::throw_delayed_StackOverflowError));
++    // Safefetch stubs.
++    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
++                                                       &StubRoutines::_safefetch32_fault_pc,
++                                                       &StubRoutines::_safefetch32_continuation_pc);
++    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
++                                                       &StubRoutines::_safefetchN_fault_pc,
++                                                       &StubRoutines::_safefetchN_continuation_pc);
 +  }
 +
 +  void generate_all() {
@@ -47050,10 +44781,6 @@ index 000000000..c5b3b094c
 +      StubRoutines::_squareToLen = generate_squareToLen();
 +    }
 +
-+    generate_compare_long_strings();
-+
-+    generate_string_indexof_stubs();
-+
 +    if (UseMontgomeryMultiplyIntrinsic) {
 +      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
 +      MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
@@ -47065,14 +44792,11 @@ index 000000000..c5b3b094c
 +      MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
 +      StubRoutines::_montgomerySquare = g.generate_square();
 +    }
-+#endif // COMPILER2
-+    // Safefetch stubs.
-+    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
-+                                                       &StubRoutines::_safefetch32_fault_pc,
-+                                                       &StubRoutines::_safefetch32_continuation_pc);
-+    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
-+                                                       &StubRoutines::_safefetchN_fault_pc,
-+                                                       &StubRoutines::_safefetchN_continuation_pc);
++#endif
++
++    generate_compare_long_strings();
++
++    generate_string_indexof_stubs();
 +
 +    StubRoutines::riscv::set_completed();
 +  }
@@ -47094,12 +44818,12 @@ index 000000000..c5b3b094c
 +}
 diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
 new file mode 100644
-index 000000000..633108b95
+index 0000000000..9202d9ec4b
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp
-@@ -0,0 +1,60 @@
+@@ -0,0 +1,57 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -47134,7 +44858,6 @@ index 000000000..633108b95
 +// Implementation of the platform-specific part of StubRoutines - for
 +// a description of how to extend it, see the stubRoutines.hpp file.
 +
-+address StubRoutines::riscv::_get_previous_fp_entry = NULL;
 +address StubRoutines::riscv::_get_previous_sp_entry = NULL;
 +
 +address StubRoutines::riscv::_f2i_fixup = NULL;
@@ -47146,8 +44869,6 @@ index 000000000..633108b95
 +address StubRoutines::riscv::_double_sign_mask = NULL;
 +address StubRoutines::riscv::_double_sign_flip = NULL;
 +address StubRoutines::riscv::_zero_blocks = NULL;
-+address StubRoutines::riscv::_has_negatives = NULL;
-+address StubRoutines::riscv::_has_negatives_long = NULL;
 +address StubRoutines::riscv::_compare_long_string_LL = NULL;
 +address StubRoutines::riscv::_compare_long_string_UU = NULL;
 +address StubRoutines::riscv::_compare_long_string_LU = NULL;
@@ -47160,12 +44881,12 @@ index 000000000..633108b95
 +bool StubRoutines::riscv::_completed = false;
 diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
 new file mode 100644
-index 000000000..8aa81980e
+index 0000000000..0c9445e18a
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp
-@@ -0,0 +1,179 @@
+@@ -0,0 +1,155 @@
 +/*
-+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -47197,20 +44918,19 @@ index 000000000..8aa81980e
 +// definition. See stubRoutines.hpp for a description on how to
 +// extend it.
 +
-+static bool    returns_to_call_stub(address return_pc)   {
++static bool returns_to_call_stub(address return_pc) {
 +  return return_pc == _call_stub_return_address;
 +}
 +
 +enum platform_dependent_constants {
 +  code_size1 = 19000,          // simply increase if too small (assembler will crash if too small)
-+  code_size2 = 36000           // simply increase if too small (assembler will crash if too small)
++  code_size2 = 28000           // simply increase if too small (assembler will crash if too small)
 +};
 +
 +class riscv {
 + friend class StubGenerator;
 +
 + private:
-+  static address _get_previous_fp_entry;
 +  static address _get_previous_sp_entry;
 +
 +  static address _f2i_fixup;
@@ -47225,8 +44945,6 @@ index 000000000..8aa81980e
 +
 +  static address _zero_blocks;
 +
-+  static address _has_negatives;
-+  static address _has_negatives_long;
 +  static address _compare_long_string_LL;
 +  static address _compare_long_string_LU;
 +  static address _compare_long_string_UL;
@@ -47235,57 +44953,44 @@ index 000000000..8aa81980e
 +  static address _string_indexof_linear_uu;
 +  static address _string_indexof_linear_ul;
 +  static address _large_byte_array_inflate;
++
 +  static bool _completed;
 +
 + public:
 +
-+  static address get_previous_fp_entry()
-+  {
-+    return _get_previous_fp_entry;
-+  }
-+
-+  static address get_previous_sp_entry()
-+  {
++  static address get_previous_sp_entry() {
 +    return _get_previous_sp_entry;
 +  }
 +
-+  static address f2i_fixup()
-+  {
++  static address f2i_fixup() {
 +    return _f2i_fixup;
 +  }
 +
-+  static address f2l_fixup()
-+  {
++  static address f2l_fixup() {
 +    return _f2l_fixup;
 +  }
 +
-+  static address d2i_fixup()
-+  {
++  static address d2i_fixup() {
 +    return _d2i_fixup;
 +  }
 +
-+  static address d2l_fixup()
-+  {
++  static address d2l_fixup() {
 +    return _d2l_fixup;
 +  }
 +
-+  static address float_sign_mask()
-+  {
++  static address float_sign_mask() {
 +    return _float_sign_mask;
 +  }
 +
-+  static address float_sign_flip()
-+  {
++  static address float_sign_flip() {
 +    return _float_sign_flip;
 +  }
 +
-+  static address double_sign_mask()
-+  {
++  static address double_sign_mask() {
 +    return _double_sign_mask;
 +  }
 +
-+  static address double_sign_flip()
-+  {
++  static address double_sign_flip() {
 +    return _double_sign_flip;
 +  }
 +
@@ -47293,14 +44998,6 @@ index 000000000..8aa81980e
 +    return _zero_blocks;
 +  }
 +
-+  static address has_negatives() {
-+    return _has_negatives;
-+  }
-+
-+  static address has_negatives_long() {
-+    return _has_negatives_long;
-+  }
-+
 +  static address compare_long_string_LL() {
 +    return _compare_long_string_LL;
 +  }
@@ -47345,14 +45042,14 @@ index 000000000..8aa81980e
 +#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
 new file mode 100644
-index 000000000..f5e212204
+index 0000000000..e639fa7e12
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
-@@ -0,0 +1,1841 @@
+@@ -0,0 +1,1833 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -47377,7 +45074,6 @@ index 000000000..f5e212204
 +
 +#include "precompiled.hpp"
 +#include "asm/macroAssembler.inline.hpp"
-+#include "classfile/javaClasses.hpp"
 +#include "gc/shared/barrierSetAssembler.hpp"
 +#include "interpreter/bytecodeHistogram.hpp"
 +#include "interpreter/bytecodeTracer.hpp"
@@ -47396,13 +45092,13 @@ index 000000000..f5e212204
 +#include "runtime/arguments.hpp"
 +#include "runtime/deoptimization.hpp"
 +#include "runtime/frame.inline.hpp"
++#include "runtime/jniHandles.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
 +#include "runtime/synchronizer.hpp"
 +#include "runtime/timer.hpp"
 +#include "runtime/vframeArray.hpp"
 +#include "utilities/debug.hpp"
-+#include "utilities/macros.hpp"
 +#include <sys/types.h>
 +
 +#ifndef PRODUCT
@@ -47453,7 +45149,7 @@ index 000000000..f5e212204
 +  //        bcp (NULL)
 +  //        ...
 +
-+  // Restore RA
++  // Restore ra
 +  __ ld(ra, Address(sp, 0));
 +  __ addi(sp, sp , 2 * wordSize);
 +
@@ -47892,7 +45588,7 @@ index 000000000..f5e212204
 +  address entry = __ pc();
 +  __ push(state);
 +  __ call_VM(noreg, runtime_entry);
-+  __ membar(MacroAssembler::AnyAny);
++  __ fence(0xf, 0xf);
 +  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
 +  return entry;
 +}
@@ -47908,9 +45604,10 @@ index 000000000..f5e212204
 +//
 +// xmethod: method
 +//
-+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow,
-+                                                         Label* profile_method,
-+                                                         Label* profile_method_continue) {
++void TemplateInterpreterGenerator::generate_counter_incr(
++        Label* overflow,
++        Label* profile_method,
++        Label* profile_method_continue) {
 +  Label done;
 +  // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not.
 +  if (TieredCompilation) {
@@ -47922,7 +45619,7 @@ index 000000000..f5e212204
 +      __ beqz(x10, no_mdo);
 +      // Increment counter in the MDO
 +      const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) +
-+                                           in_bytes(InvocationCounter::counter_offset()));
++                                                in_bytes(InvocationCounter::counter_offset()));
 +      const Address mask(x10, in_bytes(MethodData::invoke_mask_offset()));
 +      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow);
 +      __ j(done);
@@ -47930,19 +45627,19 @@ index 000000000..f5e212204
 +    __ bind(no_mdo);
 +    // Increment counter in MethodCounters
 +    const Address invocation_counter(t1,
-+                                     MethodCounters::invocation_counter_offset() +
-+                                     InvocationCounter::counter_offset());
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
 +    __ get_method_counters(xmethod, t1, done);
 +    const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset()));
 +    __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow);
 +    __ bind(done);
 +  } else { // not TieredCompilation
 +    const Address backedge_counter(t1,
-+                                   MethodCounters::backedge_counter_offset() +
-+                                   InvocationCounter::counter_offset());
++                  MethodCounters::backedge_counter_offset() +
++                  InvocationCounter::counter_offset());
 +    const Address invocation_counter(t1,
-+                                     MethodCounters::invocation_counter_offset() +
-+                                     InvocationCounter::counter_offset());
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
 +
 +    __ get_method_counters(xmethod, t1, done);
 +
@@ -47978,7 +45675,7 @@ index 000000000..f5e212204
 +      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
 +      __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset())));
 +      __ bltu(x10, t1, done);
-+      __ j(*overflow); // offset is too large so we have to use j instead of bgeu here
++      __ j(*overflow);
 +    }
 +    __ bind(done);
 +  }
@@ -48168,16 +45865,19 @@ index 000000000..f5e212204
 +#if INCLUDE_SHENANDOAHGC
 +  if (UseShenandoahGC) {
 +    __ load_mirror(x28, xmethod);
++    __ sd(zr, Address(sp, 5 * wordSize));
 +    __ sd(x28, Address(sp, 4 * wordSize));
 +  } else
 +#endif
 +  {
-+    __ load_mirror(t0, xmethod);
-+    __ sd(t0, Address(sp, 4 * wordSize));
++    __ load_mirror(t2, xmethod);
++    __ sd(zr, Address(sp, 5 * wordSize));
++    __ sd(t2, Address(sp, 4 * wordSize));
 +  }
-+  __ sd(zr, Address(sp, 5 * wordSize));
 +
-+  __ load_constant_pool_cache(xcpool, xmethod);
++  __ ld(xcpool, Address(xmethod, Method::const_offset()));
++  __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset()));
++  __ ld(xcpool, Address(xcpool, ConstantPool::cache_offset_in_bytes()));
 +  __ sd(xcpool, Address(sp, 3 * wordSize));
 +  __ sd(xlocals, Address(sp, 2 * wordSize));
 +
@@ -48192,7 +45892,8 @@ index 000000000..f5e212204
 +
 +  // Move SP out of the way
 +  if (!native_call) {
-+    __ load_max_stack(t0, xmethod);
++    __ ld(t0, Address(xmethod, Method::const_offset()));
++    __ lhu(t0, Address(t0, ConstMethod::max_stack_offset()));
 +    __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2);
 +    __ slli(t0, t0, 3);
 +    __ sub(t0, sp, t0);
@@ -48237,7 +45938,7 @@ index 000000000..f5e212204
 +  // xmethod: Method*
 +  // x30: senderSP must preserve for slow path, set SP to it on fast path
 +
-+  // RA is live.  It must be saved around calls.
++  // ra is live.  It must be saved around calls.
 +
 +  address entry = __ pc();
 +
@@ -48303,14 +46004,12 @@ index 000000000..f5e212204
 +  // Bang each page in the shadow zone. We can't assume it's been done for
 +  // an interpreter frame with greater than a page of locals, so each page
 +  // needs to be checked.  Only true for non-native.
-+  if (UseStackBanging) {
-+    const int n_shadow_pages = checked_cast<int>(JavaThread::stack_shadow_zone_size()) / os::vm_page_size();
-+    const int start_page = native_call ? n_shadow_pages : 1;
-+    const int page_size = os::vm_page_size();
-+    for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
-+      __ sub(t1, sp, pages * page_size);
-+      __ sd(zr, Address(t1));
-+    }
++  const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size();
++  const int start_page = native_call ? n_shadow_pages : 1;
++  const int page_size = os::vm_page_size();
++  for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
++    __ sub(t0, sp, pages * page_size);
++    __ sd(zr, Address(t0));
 +  }
 +}
 +
@@ -48319,7 +46018,7 @@ index 000000000..f5e212204
 +// native method than the typical interpreter frame setup.
 +address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
 +  // determine code generation flags
-+  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++  bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
 +
 +  // x11: Method*
 +  // x30: sender sp
@@ -48535,18 +46234,8 @@ index 000000000..f5e212204
 +  __ mv(t0, _thread_in_native_trans);
 +  __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
 +
-+  if (os::is_MP()) {
-+    if (UseMembar) {
-+      // Force this write out before the read below
-+      __ membar(MacroAssembler::AnyAny);
-+    } else {
-+      // Write serialization page so VM thread can do a pseudo remote membar.
-+      // We use the current thread pointer to calculate a thread specific
-+      // offset to write to within the page. This minimizes bus traffic
-+      // due to cache line collision.
-+      __ serialize_memory(xthread, t0, t1);
-+    }
-+  }
++  // Force this write out before the read below
++  __ membar(MacroAssembler::AnyAny);
 +
 +  // check for safepoint operation in progress and/or pending suspend requests
 +  {
@@ -48592,7 +46281,7 @@ index 000000000..f5e212204
 +  // and result handler will pick it up
 +
 +  {
-+    Label no_oop, not_weak, store_result;
++    Label no_oop;
 +    __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
 +    __ bne(t, result_handler, no_oop);
 +    // Unbox oop result, e.g. JNIHandles::resolve result.
@@ -48607,14 +46296,14 @@ index 000000000..f5e212204
 +  {
 +    Label no_reguard;
 +    __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset())));
-+    __ addi(t1, zr, JavaThread::stack_guard_yellow_reserved_disabled);
++    __ addi(t1, zr, (u1)JavaThread::stack_guard_yellow_reserved_disabled);
 +    __ bne(t0, t1, no_reguard);
 +
-+    __ push_call_clobbered_registers();
++    __ pusha(); // only save smashed registers
 +    __ mv(c_rarg0, xthread);
 +    __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
 +    __ jalr(t1);
-+    __ pop_call_clobbered_registers();
++    __ popa(); // only restore smashed registers
 +    __ bind(no_reguard);
 +  }
 +
@@ -48850,7 +46539,7 @@ index 000000000..f5e212204
 +      __ set_method_data_pointer_for_bcp();
 +      // don't think we need this
 +      __ get_method(x11);
-+      __ jal(profile_method_continue);
++      __ j(profile_method_continue);
 +    }
 +    // Handle overflow of counter and compile method
 +    __ bind(invocation_counter_overflow);
@@ -49014,7 +46703,7 @@ index 000000000..f5e212204
 +    Label L_done;
 +
 +    __ lbu(t0, Address(xbcp, 0));
-+    __ mv(t1, Bytecodes::_invokestatic);
++    __ li(t1, Bytecodes::_invokestatic);
 +    __ bne(t1, t0, L_done);
 +
 +    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
@@ -49060,7 +46749,7 @@ index 000000000..f5e212204
 +  // ra: return address/pc that threw exception
 +  // sp: expression stack of caller
 +  // fp: fp of caller
-+  // FIXME: There's no point saving RA here because VM calls don't trash it
++  // FIXME: There's no point saving ra here because VM calls don't trash it
 +  __ sub(sp, sp, 2 * wordSize);
 +  __ sd(x10, Address(sp, 0));                   // save exception
 +  __ sd(ra, Address(sp, wordSize));             // save return address
@@ -49157,7 +46846,7 @@ index 000000000..f5e212204
 +  __ push_reg(t0);
 +  __ push_reg(x10);
 +  __ mv(x10, (address) &BytecodeCounter::_counter_value);
-+  __ mv(t0, 1);
++  __ li(t0, 1);
 +  __ amoadd_d(zr, x10, t0, Assembler::aqrl);
 +  __ pop_reg(x10);
 +  __ pop_reg(t0);
@@ -49192,14 +46881,14 @@ index 000000000..f5e212204
 +#endif // !PRODUCT
 diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
 new file mode 100644
-index 000000000..8e6e7dee5
+index 0000000000..84b1afc7dc
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp
-@@ -0,0 +1,4028 @@
+@@ -0,0 +1,4006 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -49225,6 +46914,7 @@ index 000000000..8e6e7dee5
 +#include "precompiled.hpp"
 +#include "asm/macroAssembler.inline.hpp"
 +#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
 +#include "interpreter/interp_masm.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "interpreter/interpreterRuntime.hpp"
@@ -49234,6 +46924,7 @@ index 000000000..8e6e7dee5
 +#include "oops/methodData.hpp"
 +#include "oops/objArrayKlass.hpp"
 +#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
 +#include "prims/methodHandles.hpp"
 +#include "runtime/frame.inline.hpp"
 +#include "runtime/sharedRuntime.hpp"
@@ -49245,7 +46936,7 @@ index 000000000..8e6e7dee5
 +// Platform-dependent initialization
 +
 +void TemplateTable::pd_initialize() {
-+  // No riscv specific initialization
++  // No RISC-V specific initialization
 +}
 +
 +// Address computation: local variables
@@ -49270,12 +46961,15 @@ index 000000000..8e6e7dee5
 +  return iaddress(n);
 +}
 +
-+static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
++static inline Address iaddress(Register r,  Register temp, InterpreterMacroAssembler* _masm) {
++  assert_cond(_masm != NULL);
 +  _masm->shadd(temp, r, xlocals, temp, 3);
 +  return Address(temp, 0);
 +}
 +
-+static inline Address laddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
++static inline Address laddress(Register r, Register temp,
++                               InterpreterMacroAssembler* _masm) {
++  assert_cond(_masm != NULL);
 +  _masm->shadd(temp, r, xlocals, temp, 3);
 +  return Address(temp, Interpreter::local_offset_in_bytes(1));;
 +}
@@ -49284,7 +46978,8 @@ index 000000000..8e6e7dee5
 +  return iaddress(r, temp, _masm);
 +}
 +
-+static inline Address daddress(Register r, Register temp, InterpreterMacroAssembler* _masm) {
++static inline Address daddress(Register r, Register temp,
++                               InterpreterMacroAssembler* _masm) {
 +  return laddress(r, temp, _masm);
 +}
 +
@@ -49292,6 +46987,10 @@ index 000000000..8e6e7dee5
 +  return iaddress(r, temp, _masm);
 +}
 +
++static inline Address at_rsp() {
++  return Address(esp, 0);
++}
++
 +// At top of Java expression stack which may be different than esp().  It
 +// isn't for category 1 objects.
 +static inline Address at_tos   () {
@@ -49326,13 +47025,15 @@ index 000000000..8e6e7dee5
 +                         Register val,
 +                         DecoratorSet decorators) {
 +  assert(val == noreg || val == x10, "parameter is just for looks");
-+  __ store_heap_oop(dst, val, x29, x11, x13, decorators);
++  assert_cond(_masm != NULL);
++  __ store_heap_oop(dst, val, x29, x11, decorators);
 +}
 +
 +static void do_oop_load(InterpreterMacroAssembler* _masm,
 +                        Address src,
 +                        Register dst,
 +                        DecoratorSet decorators) {
++  assert_cond(_masm != NULL);
 +  __ load_heap_oop(dst, src, x7, x11, decorators);
 +}
 +
@@ -49426,13 +47127,13 @@ index 000000000..8e6e7dee5
 +void TemplateTable::iconst(int value)
 +{
 +  transition(vtos, itos);
-+  __ mv(x10, value);
++  __ li(x10, value);
 +}
 +
 +void TemplateTable::lconst(int value)
 +{
 +  transition(vtos, ltos);
-+  __ mv(x10, value);
++  __ li(x10, value);
 +}
 +
 +void TemplateTable::fconst(int value)
@@ -49641,7 +47342,6 @@ index 000000000..8e6e7dee5
 +    __ bind(notLong);
 +    condy_helper(Done);
 +    __ bind(Done);
-+
 +}
 +
 +void TemplateTable::condy_helper(Label& Done)
@@ -49937,8 +47637,8 @@ index 000000000..8e6e7dee5
 +  // x11: index
 +  index_check(x10, x11); // leaves index in x11
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
-+  __ shadd(t0, x11, x10, t0, 2);
-+  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
++  __ shadd(x10, x11, x10, t0, 2);
++  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 +  __ addw(x10, x10, zr); // signed extended
 +}
 +
@@ -49951,8 +47651,8 @@ index 000000000..8e6e7dee5
 +  // x11: index
 +  index_check(x10, x11); // leaves index in x11
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
-+  __ shadd(t0, x11, x10, t0, 3);
-+  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
++  __ shadd(x10, x11, x10, t0, 3);
++  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 +}
 +
 +void TemplateTable::faload()
@@ -49964,8 +47664,8 @@ index 000000000..8e6e7dee5
 +  // x11: index
 +  index_check(x10, x11); // leaves index in x11
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
-+  __ shadd(t0, x11, x10, t0, 2);
-+  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
++  __ shadd(x10, x11, x10, t0, 2);
++  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 +}
 +
 +void TemplateTable::daload()
@@ -49977,8 +47677,8 @@ index 000000000..8e6e7dee5
 +  // x11: index
 +  index_check(x10, x11); // leaves index in x11
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
-+  __ shadd(t0, x11, x10, t0, 3);
-+  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
++  __ shadd(x10, x11, x10, t0, 3);
++  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 +}
 +
 +void TemplateTable::aaload()
@@ -49990,9 +47690,9 @@ index 000000000..8e6e7dee5
 +  // x11: index
 +  index_check(x10, x11); // leaves index in x11
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop);
-+  __ shadd(t0, x11, x10, t0, LogBytesPerHeapOop);
++  __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop);
 +  do_oop_load(_masm,
-+              Address(t0),
++              Address(x10),
 +              x10,
 +              IS_ARRAY);
 +}
@@ -50006,8 +47706,8 @@ index 000000000..8e6e7dee5
 +  // x11: index
 +  index_check(x10, x11); // leaves index in x11
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
-+  __ shadd(t0, x11, x10, t0, 0);
-+  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
++  __ shadd(x10, x11, x10, t0, 0);
++  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 +}
 +
 +void TemplateTable::caload()
@@ -50019,8 +47719,8 @@ index 000000000..8e6e7dee5
 +  // x11: index
 +  index_check(x10, x11); // leaves index in x11
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
-+  __ shadd(t0, x11, x10, t0, 1);
-+  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
++  __ shadd(x10, x11, x10, t0, 1);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 +}
 +
 +// iload followed by caload frequent pair
@@ -50036,8 +47736,8 @@ index 000000000..8e6e7dee5
 +  // x11: index
 +  index_check(x10, x11); // leaves index in x11, kills t0
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11
-+  __ shadd(t0, x11, x10, t0, 1);
-+  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
++  __ shadd(x10, x11, x10, t0, 1);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 +}
 +
 +void TemplateTable::saload()
@@ -50049,8 +47749,8 @@ index 000000000..8e6e7dee5
 +  // x11: index
 +  index_check(x10, x11); // leaves index in x11, kills t0
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1);
-+  __ shadd(t0, x11, x10, t0, 1);
-+  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg);
++  __ shadd(x10, x11, x10, t0, 1);
++  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg);
 +}
 +
 +void TemplateTable::iload(int n)
@@ -50237,7 +47937,7 @@ index 000000000..8e6e7dee5
 +  index_check(x13, x11); // prefer index in x11
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2);
 +  __ shadd(t0, x11, x13, t0, 2);
-+  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg);
++  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
 +}
 +
 +void TemplateTable::lastore() {
@@ -50250,7 +47950,7 @@ index 000000000..8e6e7dee5
 +  index_check(x13, x11); // prefer index in x11
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3);
 +  __ shadd(t0, x11, x13, t0, 3);
-+  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg);
++  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
 +}
 +
 +void TemplateTable::fastore() {
@@ -50263,7 +47963,7 @@ index 000000000..8e6e7dee5
 +  index_check(x13, x11); // prefer index in x11
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2);
 +  __ shadd(t0, x11, x13, t0, 2);
-+  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg, noreg);
++  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg);
 +}
 +
 +void TemplateTable::dastore() {
@@ -50276,7 +47976,7 @@ index 000000000..8e6e7dee5
 +  index_check(x13, x11); // prefer index in x11
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3);
 +  __ shadd(t0, x11, x13, t0, 3);
-+  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg, noreg);
++  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg);
 +}
 +
 +void TemplateTable::aastore() {
@@ -50357,7 +48057,7 @@ index 000000000..8e6e7dee5
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0);
 +
 +  __ add(x11, x13, x11);
-+  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg, noreg);
++  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg);
 +}
 +
 +void TemplateTable::castore()
@@ -50371,7 +48071,7 @@ index 000000000..8e6e7dee5
 +  index_check(x13, x11); // prefer index in x11
 +  __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1);
 +  __ shadd(t0, x11, x13, t0, 1);
-+  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg);
++  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg);
 +}
 +
 +void TemplateTable::sastore()
@@ -50982,9 +48682,9 @@ index 000000000..8e6e7dee5
 +    } else { // not TieredCompilation
 +      // increment counter
 +      __ ld(t1, Address(xmethod, Method::method_counters_offset()));
-+      __ lwu(x10, Address(t1, be_offset));        // load backedge counter
++      __ lwu(x10, Address(t1, be_offset));     // load backedge counter
 +      __ addw(t0, x10, InvocationCounter::count_increment); // increment counter
-+      __ sw(t0, Address(t1, be_offset));        // store counter
++      __ sw(t0, Address(t1, be_offset));       // store counter
 +
 +      __ lwu(x10, Address(t1, inv_offset));    // load invocation counter
 +      __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits
@@ -51025,7 +48725,6 @@ index 000000000..8e6e7dee5
 +    }
 +    __ bind(dispatch);
 +  }
-+
 +  // Pre-load the next target bytecode into t0
 +  __ load_unsigned_byte(t0, Address(xbcp, 0));
 +
@@ -51092,7 +48791,6 @@ index 000000000..8e6e7dee5
 +      __ jr(t0);
 +    }
 +  }
-+
 +}
 +
 +void TemplateTable::if_0cmp(Condition cc)
@@ -51188,9 +48886,9 @@ index 000000000..8e6e7dee5
 +  __ pop_ptr(x11);
 +
 +  if (cc == equal) {
-+    __ oop_nequal(x11, x10, not_taken);
++    __ bne(x11, x10, not_taken);
 +  } else if (cc == not_equal) {
-+    __ oop_equal(x11, x10, not_taken);
++    __ beq(x11, x10, not_taken);
 +  }
 +  branch(false, false);
 +  __ bind(not_taken);
@@ -51648,14 +49346,6 @@ index 000000000..8e6e7dee5
 +    pop_and_check_object(obj);
 +  }
 +
-+  if (!UseBarriersForVolatile) {
-+    Label notVolatile;
-+    __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::AnyAny);
-+    __ bind(notVolatile);
-+  }
-+
 +  __ add(off, obj, off);
 +  const Address field(off);
 +
@@ -51918,7 +49608,7 @@ index 000000000..8e6e7dee5
 +    }
 +    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
 +    const Address field(off, 0); // off register as temparator register.
-+    __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg);
++    __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg);
 +    if (rc == may_rewrite) {
 +      patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no);
 +    }
@@ -51938,7 +49628,7 @@ index 000000000..8e6e7dee5
 +    }
 +    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
 +    const Address field(off, 0);
-+    __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg);
++    __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg);
 +    if (rc == may_rewrite) {
 +      patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no);
 +    }
@@ -51979,7 +49669,7 @@ index 000000000..8e6e7dee5
 +    }
 +    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
 +    const Address field(off, 0);
-+    __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg);
++    __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg);
 +    if (rc == may_rewrite) {
 +      patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no);
 +    }
@@ -51999,7 +49689,7 @@ index 000000000..8e6e7dee5
 +    }
 +    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
 +    const Address field(off, 0);
-+    __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg);
++    __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg);
 +    if (rc == may_rewrite) {
 +      patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no);
 +    }
@@ -52019,7 +49709,7 @@ index 000000000..8e6e7dee5
 +    }
 +    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
 +    const Address field(off, 0);
-+    __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg);
++    __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg);
 +    if (rc == may_rewrite) {
 +      patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no);
 +    }
@@ -52039,7 +49729,7 @@ index 000000000..8e6e7dee5
 +    }
 +    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
 +    const Address field(off, 0);
-+    __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg);
++    __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg);
 +    if (rc == may_rewrite) {
 +      patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no);
 +    }
@@ -52059,7 +49749,7 @@ index 000000000..8e6e7dee5
 +    }
 +    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
 +    const Address field(off, 0);
-+    __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg);
++    __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
 +    if (rc == may_rewrite) {
 +      patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no);
 +    }
@@ -52081,7 +49771,7 @@ index 000000000..8e6e7dee5
 +    }
 +    __ add(off, obj, off); // if static, obj from cache, else obj from stack.
 +    const Address field(off, 0);
-+    __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg);
++    __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
 +    if (rc == may_rewrite) {
 +      patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no);
 +    }
@@ -52218,28 +49908,28 @@ index 000000000..8e6e7dee5
 +      do_oop_store(_masm, field, x10, IN_HEAP);
 +      break;
 +    case Bytecodes::_fast_lputfield:
-+      __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg);
++      __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg);
 +      break;
 +    case Bytecodes::_fast_iputfield:
-+      __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg);
++      __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg);
 +      break;
 +    case Bytecodes::_fast_zputfield:
-+      __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg);
++      __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg);
 +      break;
 +    case Bytecodes::_fast_bputfield:
-+      __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg);
++      __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg);
 +      break;
 +    case Bytecodes::_fast_sputfield:
-+      __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg);
++      __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg);
 +      break;
 +    case Bytecodes::_fast_cputfield:
-+      __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg);
++      __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg);
 +      break;
 +    case Bytecodes::_fast_fputfield:
-+      __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg);
++      __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
 +      break;
 +    case Bytecodes::_fast_dputfield:
-+      __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg);
++      __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
 +      break;
 +    default:
 +      ShouldNotReachHere();
@@ -52298,14 +49988,6 @@ index 000000000..8e6e7dee5
 +  __ add(x11, x10, x11);
 +  const Address field(x11, 0);
 +
-+  if (!UseBarriersForVolatile) {
-+    Label notVolatile;
-+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::AnyAny);
-+    __ bind(notVolatile);
-+  }
-+
 +  // access field
 +  switch (bytecode()) {
 +    case Bytecodes::_fast_agetfield:
@@ -52357,16 +50039,6 @@ index 000000000..8e6e7dee5
 +  __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
 +                                   ConstantPoolCacheEntry::f2_offset())));
 +
-+  if (!UseBarriersForVolatile) {
-+    Label notVolatile;
-+    __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() +
-+                                      ConstantPoolCacheEntry::flags_offset())));
-+    __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift);
-+    __ beqz(t0, notVolatile);
-+    __ membar(MacroAssembler::AnyAny);
-+    __ bind(notVolatile);
-+  }
-+
 +  // make sure exception is reported in correct bcp range (getfield is
 +  // next instruction)
 +  __ addi(xbcp, xbcp, 1);
@@ -52383,8 +50055,8 @@ index 000000000..8e6e7dee5
 +      __ verify_oop(x10);
 +      break;
 +    case ftos:
-+      __ add(t0, x10, x11);
-+      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(t0), noreg, noreg);
++      __ add(x10, x10, x11);
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(x10), noreg, noreg);
 +      break;
 +    default:
 +      ShouldNotReachHere();
@@ -52406,11 +50078,6 @@ index 000000000..8e6e7dee5
 +//-----------------------------------------------------------------------------
 +// Calls
 +
-+void TemplateTable::count_calls(Register method, Register temp)
-+{
-+  __ call_Unimplemented();
-+}
-+
 +void TemplateTable::prepare_invoke(int byte_no,
 +                                   Register method, // linked method (or i-klass)
 +                                   Register index,  // itable index, MethodType, etc.
@@ -52432,8 +50099,8 @@ index 000000000..8e6e7dee5
 +  assert(recv  == noreg || recv  == x12, "");
 +
 +  // setup registers & access constant pool cache
-+  if (recv  == noreg) {
-+    recv  = x12;
++  if (recv == noreg) {
++    recv = x12;
 +  }
 +  if (flags == noreg) {
 +    flags = x13;
@@ -52495,7 +50162,7 @@ index 000000000..8e6e7dee5
 +  __ beqz(t0, notFinal);
 +
 +  const Register method = index;  // method must be xmethod
-+  assert(method == xmethod, "methodOop must be xmethod for interpreter calling convention");
++  assert(method == xmethod, "Method must be xmethod for interpreter calling convention");
 +
 +  // do the call - the index is actually the method to call
 +  // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
@@ -52518,7 +50185,7 @@ index 000000000..8e6e7dee5
 +  // profile this call
 +  __ profile_virtual_call(x10, xlocals, x13);
 +
-+  // get target methodOop & entry point
++  // get target Method & entry point
 +  __ lookup_virtual_method(x10, index, method);
 +  __ profile_arguments_type(x13, method, x14, true);
 +  __ jump_from_interpreted(method);
@@ -52654,7 +50321,7 @@ index 000000000..8e6e7dee5
 +                             xmethod, x30,
 +                             no_such_interface);
 +
-+  // xmethod: methodOop to call
++  // xmethod: Method to call
 +  // x12: receiver
 +  // Check for abstract method error
 +  // Note: This should be done more efficiently via a throw_abstract_method_error
@@ -52666,7 +50333,7 @@ index 000000000..8e6e7dee5
 +
 +  // do the call
 +  // x12: receiver
-+  // xmethod,: methodOop
++  // xmethod: Method
 +  __ jump_from_interpreted(xmethod);
 +  __ should_not_reach_here();
 +
@@ -52828,7 +50495,7 @@ index 000000000..8e6e7dee5
 +      __ bnez(x13, loop);
 +    }
 +
-+    // initialize object hader only.
++    // initialize object header only.
 +    __ bind(initialize_header);
 +    if (UseBiasedLocking) {
 +      __ ld(t0, Address(x14, Klass::prototype_header_offset()));
@@ -52988,7 +50655,7 @@ index 000000000..8e6e7dee5
 +  __ j(done);
 +  // Come here on success
 +  __ bind(ok_is_subtype);
-+  __ mv(x10, 1);
++  __ li(x10, 1);
 +
 +  // Collect counts on whether this test sees NULLs a lot or not.
 +  if (ProfileInterpreter) {
@@ -53226,12 +50893,12 @@ index 000000000..8e6e7dee5
 +}
 diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
 new file mode 100644
-index 000000000..b437c8f4c
+index 0000000000..fcc86108d2
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp
 @@ -0,0 +1,42 @@
 +/*
-+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -53274,13 +50941,12 @@ index 000000000..b437c8f4c
 +#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
 new file mode 100644
-index 000000000..03079aec0
+index 0000000000..6c89133de0
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp
-@@ -0,0 +1,43 @@
+@@ -0,0 +1,42 @@
 +/*
-+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -53323,10 +50989,10 @@ index 000000000..03079aec0
 +#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
 new file mode 100644
-index 000000000..dd4f5c9ae
+index 0000000000..6bdce51506
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp
-@@ -0,0 +1,91 @@
+@@ -0,0 +1,87 @@
 +/*
 + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
@@ -53352,6 +51018,7 @@ index 000000000..dd4f5c9ae
 + *
 + */
 +
++#include "precompiled.hpp"
 +#include "memory/allocation.hpp"
 +#include "memory/allocation.inline.hpp"
 +#include "runtime/os.inline.hpp"
@@ -53371,11 +51038,6 @@ index 000000000..dd4f5c9ae
 +    return;
 +  }
 +
-+  int core_id = -1;
-+  int chip_id = -1;
-+  int len = 0;
-+  char* src_string = NULL;
-+
 +  _no_of_cores  = os::processor_count();
 +  _no_of_threads = _no_of_cores;
 +  _no_of_sockets = _no_of_cores;
@@ -53420,13 +51082,13 @@ index 000000000..dd4f5c9ae
 +}
 diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
 new file mode 100644
-index 000000000..0982b6668
+index 0000000000..711e4aeaf6
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp
 @@ -0,0 +1,55 @@
 +/*
 + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -53481,12 +51143,12 @@ index 000000000..0982b6668
 +#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
 new file mode 100644
-index 000000000..31d5bb5f4
+index 0000000000..0e8f526bd9
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp
-@@ -0,0 +1,190 @@
+@@ -0,0 +1,209 @@
 +/*
-+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -53512,21 +51174,31 @@ index 000000000..31d5bb5f4
 +
 +#include "precompiled.hpp"
 +#include "runtime/java.hpp"
++#include "runtime/os.hpp"
 +#include "runtime/vm_version.hpp"
-+#include "utilities/macros.hpp"
 +#include "utilities/formatBuffer.hpp"
++#include "utilities/macros.hpp"
 +
 +#include OS_HEADER_INLINE(os)
 +
 +const char* VM_Version::_uarch = "";
++const char* VM_Version::_vm_mode = "";
 +uint32_t VM_Version::_initial_vector_length = 0;
 +
 +void VM_Version::initialize() {
 +  get_os_cpu_info();
 +
++  // check if satp.mode is supported, currently supports up to SV48(RV64)
++  if (get_satp_mode() > VM_SV48) {
++    vm_exit_during_initialization(
++      err_msg("Unsupported satp mode: %s. Only satp modes up to sv48 are supported for now.",
++              _vm_mode));
++  }
++
 +  if (FLAG_IS_DEFAULT(UseFMA)) {
 +    FLAG_SET_DEFAULT(UseFMA, true);
 +  }
++
 +  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
 +    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
 +  }
@@ -53568,12 +51240,12 @@ index 000000000..31d5bb5f4
 +  }
 +
 +  if (UseCRC32Intrinsics) {
-+    warning("CRC32Intrinsics instructions are not available on this CPU.");
++    warning("CRC32 intrinsics are not available on this CPU.");
 +    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
 +  }
 +
 +  if (UseCRC32CIntrinsics) {
-+    warning("CRC32CIntrinsics instructions are not available on this CPU.");
++    warning("CRC32C intrinsics are not available on this CPU.");
 +    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
 +  }
 +
@@ -53587,6 +51259,11 @@ index 000000000..31d5bb5f4
 +    }
 +  }
 +
++  if (UseRVC && !(_features & CPU_C)) {
++    warning("RVC is not supported on this CPU");
++    FLAG_SET_DEFAULT(UseRVC, false);
++  }
++
 +  if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) {
 +    FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
 +  }
@@ -53610,16 +51287,20 @@ index 000000000..31d5bb5f4
 +  _features_string = os::strdup(buf);
 +
 +#ifdef COMPILER2
-+  initialize_c2();
++  c2_initialize();
 +#endif // COMPILER2
++
++  UNSUPPORTED_OPTION(CriticalJNINatives);
++
++  FLAG_SET_DEFAULT(UseMembar, true);
 +}
 +
 +#ifdef COMPILER2
-+void VM_Version::initialize_c2() {
-+  // lack of cmove in riscv
++void VM_Version::c2_initialize() {
 +  if (UseCMoveUnconditionally) {
 +    FLAG_SET_DEFAULT(UseCMoveUnconditionally, false);
 +  }
++
 +  if (ConditionalMoveLimit > 0) {
 +    FLAG_SET_DEFAULT(ConditionalMoveLimit, 0);
 +  }
@@ -53658,14 +51339,6 @@ index 000000000..31d5bb5f4
 +    FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
 +  }
 +
-+  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
-+    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
-+  }
-+
-+  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
-+    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
-+  }
-+
 +  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
 +    FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
 +  }
@@ -53673,16 +51346,25 @@ index 000000000..31d5bb5f4
 +  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
 +    FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true);
 +  }
++
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true);
++  }
++
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true);
++  }
 +}
 +#endif // COMPILER2
 diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
 new file mode 100644
-index 000000000..0178e6d75
+index 0000000000..875511f522
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp
-@@ -0,0 +1,65 @@
+@@ -0,0 +1,80 @@
 +/*
-+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -53710,10 +51392,33 @@ index 000000000..0178e6d75
 +#define CPU_RISCV_VM_VERSION_RISCV_HPP
 +
 +#include "runtime/abstract_vm_version.hpp"
++#include "runtime/arguments.hpp"
 +#include "runtime/globals_extension.hpp"
 +#include "utilities/sizes.hpp"
 +
 +class VM_Version : public Abstract_VM_Version {
++#ifdef COMPILER2
++private:
++  static void c2_initialize();
++#endif // COMPILER2
++
++// VM modes (satp.mode) privileged ISA 1.10
++enum VM_MODE {
++  VM_MBARE = 0,
++  VM_SV39  = 8,
++  VM_SV48  = 9,
++  VM_SV57  = 10,
++  VM_SV64  = 11
++};
++
++protected:
++  static const char* _uarch;
++  static const char* _vm_mode;
++  static uint32_t _initial_vector_length;
++  static void get_os_cpu_info();
++  static uint32_t get_current_vector_length();
++  static VM_MODE get_satp_mode();
++
 +public:
 +  // Initialization
 +  static void initialize();
@@ -53733,29 +51438,19 @@ index 000000000..0178e6d75
 +#undef DECLARE_CPU_FEATURE_FLAG
 +  };
 +
-+protected:
-+  static const char* _uarch;
-+  static uint32_t _initial_vector_length;
-+  static void get_os_cpu_info();
-+  static uint32_t get_current_vector_length();
-+
-+#ifdef COMPILER2
-+private:
-+  static void initialize_c2();
-+#endif // COMPILER2
++  static void initialize_cpu_information(void);
 +};
 +
 +#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
 new file mode 100644
-index 000000000..6572d9334
+index 0000000000..c4338715f9
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp
-@@ -0,0 +1,60 @@
+@@ -0,0 +1,51 @@
 +/*
 + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -53783,10 +51478,10 @@ index 000000000..6572d9334
 +#include "code/vmreg.hpp"
 +
 +void VMRegImpl::set_regName() {
-+  Register reg = ::as_Register(0);
 +  int i = 0;
++  Register reg = ::as_Register(0);
 +  for ( ; i < ConcreteRegisterImpl::max_gpr ; ) {
-+    for (int j = 0; j < RegisterImpl::max_slots_per_register; j++) {
++    for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) {
 +      regName[i++] = reg->name();
 +    }
 +    reg = reg->successor();
@@ -53794,34 +51489,25 @@ index 000000000..6572d9334
 +
 +  FloatRegister freg = ::as_FloatRegister(0);
 +  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
-+    for (int j = 0; j < FloatRegisterImpl::max_slots_per_register; j++) {
++    for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
 +      regName[i++] = freg->name();
 +    }
 +    freg = freg->successor();
 +  }
 +
-+  VectorRegister vreg = ::as_VectorRegister(0);
-+  for ( ; i < ConcreteRegisterImpl::max_vpr ; ) {
-+    for (int j = 0; j < VectorRegisterImpl::max_slots_per_register; j++) {
-+      regName[i++] = vreg->name();
-+    }
-+    vreg = vreg->successor();
-+  }
-+
-+  for ( ; i < ConcreteRegisterImpl::number_of_registers; i++) {
-+    regName[i] = "NON-GPR-FPR-VPR";
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) {
++    regName[i] = "NON-GPR-FPR";
 +  }
 +}
 diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
 new file mode 100644
-index 000000000..ec76a1db1
+index 0000000000..6f613a8f11
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp
-@@ -0,0 +1,64 @@
+@@ -0,0 +1,53 @@
 +/*
-+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -53855,27 +51541,17 @@ index 000000000..ec76a1db1
 +  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
 +}
 +
-+inline bool is_VectorRegister() {
-+  return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr;
-+}
-+
 +inline Register as_Register() {
-+  assert( is_Register(), "must be");
++  assert(is_Register(), "must be");
 +  return ::as_Register(value() / RegisterImpl::max_slots_per_register);
 +}
 +
 +inline FloatRegister as_FloatRegister() {
-+  assert( is_FloatRegister() && is_even(value()), "must be" );
++  assert(is_FloatRegister() && is_even(value()), "must be");
 +  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
 +                            FloatRegisterImpl::max_slots_per_register);
 +}
 +
-+inline VectorRegister as_VectorRegister() {
-+  assert( is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be" );
-+  return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) /
-+                             VectorRegisterImpl::max_slots_per_register);
-+}
-+
 +inline bool is_concrete() {
 +  assert(is_reg(), "must be");
 +  return is_even(value());
@@ -53884,14 +51560,13 @@ index 000000000..ec76a1db1
 +#endif // CPU_RISCV_VMREG_RISCV_HPP
 diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
 new file mode 100644
-index 000000000..9605e59f4
+index 0000000000..06b70020b4
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp
-@@ -0,0 +1,47 @@
+@@ -0,0 +1,46 @@
 +/*
-+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -53917,19 +51592,19 @@ index 000000000..9605e59f4
 +#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
 +#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
 +
-+inline VMReg RegisterImpl::as_VMReg() {
-+  if( this == noreg ) {
++inline VMReg RegisterImpl::as_VMReg() const {
++  if (this == noreg) {
 +    return VMRegImpl::Bad();
 +  }
 +  return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
 +}
 +
-+inline VMReg FloatRegisterImpl::as_VMReg() {
++inline VMReg FloatRegisterImpl::as_VMReg() const {
 +  return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
 +                             ConcreteRegisterImpl::max_gpr);
 +}
 +
-+inline VMReg VectorRegisterImpl::as_VMReg() {
++inline VMReg VectorRegisterImpl::as_VMReg() const {
 +  return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) +
 +                             ConcreteRegisterImpl::max_fpr);
 +}
@@ -53937,12 +51612,12 @@ index 000000000..9605e59f4
 +#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP
 diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
 new file mode 100644
-index 000000000..b2aa87ab8
+index 0000000000..0d205240a5
 --- /dev/null
 +++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp
 @@ -0,0 +1,260 @@
 +/*
-+ * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2014, Red Hat Inc. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -54015,7 +51690,7 @@ index 000000000..b2aa87ab8
 +#if (!defined(PRODUCT) && defined(COMPILER2))
 +  if (CountCompiledCalls) {
 +    __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
-+    __ increment(Address(t2));
++    __ add_memory_int64(Address(t2), 1);
 +  }
 +#endif
 +
@@ -54106,7 +51781,7 @@ index 000000000..b2aa87ab8
 +#if (!defined(PRODUCT) && defined(COMPILER2))
 +  if (CountCompiledCalls) {
 +    __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
-+    __ increment(Address(x18));
++    __ add_memory_int64(Address(x18), 1);
 +  }
 +#endif
 +
@@ -54114,7 +51789,7 @@ index 000000000..b2aa87ab8
 +  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
 +
 +  // Entry arguments:
-+  //  t2: CompiledICHolder
++  //  t1: CompiledICHolder
 +  //  j_rarg0: Receiver
 +
 +  // This stub is called from compiled code which has no callee-saved registers,
@@ -54197,380 +51872,24 @@ index 000000000..b2aa87ab8
 +}
 +
 +int VtableStub::pd_code_alignment() {
-+  // riscv cache line size is 64 bytes, but we want to limit alignment loss.
++  // RISCV cache line size is not an architected constant. We just align on word size.
 +  const unsigned int icache_line_size = wordSize;
 +  return icache_line_size;
 +}
-diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-index 897be2209..3b836fe6b 100644
---- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-@@ -1447,7 +1447,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
- }
- 
- // result = condition ? opr1 : opr2
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr || cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on s390");
-+
-   Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual;
-   switch (condition) {
-     case lir_cond_equal:        acond = Assembler::bcondEqual;    ncond = Assembler::bcondNotEqual; break;
-diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
-index e335f473d..53ad912cb 100644
---- a/src/hotspot/cpu/s390/s390.ad
-+++ b/src/hotspot/cpu/s390/s390.ad
-@@ -1522,14 +1522,16 @@ const bool Matcher::match_rule_supported(int opcode) {
-                 // BUT: make sure match rule is not disabled by a false predicate!
- }
- 
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
-   // TODO
-   // Identify extra cases that we might want to provide match rules for
-   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen.
--  bool ret_value = match_rule_supported(opcode);
-+  if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
-+    return false;
-+  }
-   // Add rules here.
- 
--  return ret_value;  // Per default match rules are supported.
-+  return true; // Per default match rules are supported.
- }
- 
- int Matcher::regnum_to_fpu_offset(int regnum) {
-@@ -1578,6 +1580,14 @@ const uint Matcher::vector_shift_count_ideal_reg(int size) {
-   return Node::NotAMachineReg;
- }
- 
-+const bool Matcher::supports_scalable_vector() {
-+  return false;
-+}
-+
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return -1;
-+}
-+
- // z/Architecture does support misaligned store/load at minimal extra cost.
- const bool Matcher::misaligned_vectors_ok() {
-   return true;
-diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad
-index 7a2798a51..7d9b17b44 100644
---- a/src/hotspot/cpu/sparc/sparc.ad
-+++ b/src/hotspot/cpu/sparc/sparc.ad
-@@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) {
-   return true;  // Per default match rules are supported.
- }
- 
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
- 
-   // TODO
-   // identify extra cases that we might want to provide match rules for
-@@ -1764,6 +1764,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
-   return max_vector_size(bt); // Same as max.
- }
- 
-+const bool Matcher::supports_scalable_vector() {
-+  return false;
-+}
-+
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return -1;
-+}
-+
- // SPARC doesn't support misaligned vectors store/load.
- const bool Matcher::misaligned_vectors_ok() {
-   return false;
-diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-index cee3140f4..d38c63600 100644
---- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-@@ -1970,7 +1970,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
-   }
- }
- 
--void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
-+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type,
-+                          LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) {
-+  assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on x86");
-+
-   Assembler::Condition acond, ncond;
-   switch (condition) {
-     case lir_cond_equal:        acond = Assembler::equal;        ncond = Assembler::notEqual;     break;
-diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
-index 82fd8522b..8016d328a 100644
---- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
-+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
-@@ -6606,6 +6606,99 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register
-   bind(DONE_LABEL);
- } // string_indexof_char
- 
-+void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
-+                                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
-+  ShortBranchVerifier sbv(this);
-+  assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
-+
-+  int stride = 16;
-+
-+  Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP,
-+        SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP,
-+        RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT,
-+        FOUND_SEQ_CHAR, DONE_LABEL;
-+
-+  movptr(result, str1);
-+  if (UseAVX >= 2) {
-+    cmpl(cnt1, stride);
-+    jcc(Assembler::less, SCAN_TO_CHAR_INIT);
-+    cmpl(cnt1, stride*2);
-+    jcc(Assembler::less, SCAN_TO_16_CHAR_INIT);
-+    movdl(vec1, ch);
-+    vpbroadcastb(vec1, vec1, Assembler::AVX_256bit);
-+    vpxor(vec2, vec2);
-+    movl(tmp, cnt1);
-+    andl(tmp, 0xFFFFFFE0);  //vector count (in chars)
-+    andl(cnt1,0x0000001F);  //tail count (in chars)
-+
-+    bind(SCAN_TO_32_CHAR_LOOP);
-+    vmovdqu(vec3, Address(result, 0));
-+    vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit);
-+    vptest(vec2, vec3);
-+    jcc(Assembler::carryClear, FOUND_CHAR);
-+    addptr(result, 32);
-+    subl(tmp, stride*2);
-+    jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP);
-+    jmp(SCAN_TO_16_CHAR);
-+
-+    bind(SCAN_TO_16_CHAR_INIT);
-+    movdl(vec1, ch);
-+    pxor(vec2, vec2);
-+    pshufb(vec1, vec2);
-+  }
-+
-+  bind(SCAN_TO_16_CHAR);
-+  cmpl(cnt1, stride);
-+  jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left
-+  if (UseAVX < 2) {
-+    movdl(vec1, ch);
-+    pxor(vec2, vec2);
-+    pshufb(vec1, vec2);
-+  }
-+  movl(tmp, cnt1);
-+  andl(tmp, 0xFFFFFFF0);  //vector count (in bytes)
-+  andl(cnt1,0x0000000F);  //tail count (in bytes)
-+
-+  bind(SCAN_TO_16_CHAR_LOOP);
-+  movdqu(vec3, Address(result, 0));
-+  pcmpeqb(vec3, vec1);
-+  ptest(vec2, vec3);
-+  jcc(Assembler::carryClear, FOUND_CHAR);
-+  addptr(result, 16);
-+  subl(tmp, stride);
-+  jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items...
-+
-+  bind(SCAN_TO_CHAR_INIT);
-+  testl(cnt1, cnt1);
-+  jcc(Assembler::zero, RET_NOT_FOUND);
-+  bind(SCAN_TO_CHAR_LOOP);
-+  load_unsigned_byte(tmp, Address(result, 0));
-+  cmpl(ch, tmp);
-+  jccb(Assembler::equal, FOUND_SEQ_CHAR);
-+  addptr(result, 1);
-+  subl(cnt1, 1);
-+  jccb(Assembler::zero, RET_NOT_FOUND);
-+  jmp(SCAN_TO_CHAR_LOOP);
-+
-+  bind(RET_NOT_FOUND);
-+  movl(result, -1);
-+  jmpb(DONE_LABEL);
-+
-+  bind(FOUND_CHAR);
-+  if (UseAVX >= 2) {
-+    vpmovmskb(tmp, vec3);
-+  } else {
-+    pmovmskb(tmp, vec3);
-+  }
-+  bsfl(ch, tmp);
-+  addptr(result, ch);
-+
-+  bind(FOUND_SEQ_CHAR);
-+  subptr(result, str1);
-+
-+  bind(DONE_LABEL);
-+} // stringL_indexof_char
-+
- // helper function for string_compare
- void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
-                                         Address::ScaleFactor scale, Address::ScaleFactor scale1,
-diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
-index 1bed0cce9..47a062c11 100644
---- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
-+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
-@@ -1659,6 +1659,8 @@ public:
- #ifdef COMPILER2
-   void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
-                            XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
-+  void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
-+                           XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
- 
-   // IndexOf strings.
-   // Small strings are loaded through stack if they cross page boundary.
-diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
-index baa7cc774..238d8729b 100644
---- a/src/hotspot/cpu/x86/x86.ad
-+++ b/src/hotspot/cpu/x86/x86.ad
-@@ -1511,10 +1511,13 @@ const bool Matcher::match_rule_supported(int opcode) {
-   return ret_value;  // Per default match rules are supported.
- }
- 
--const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
-+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
-   // identify extra cases that we might want to provide match rules for
-   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
-   bool ret_value = match_rule_supported(opcode);
-+  if (!vector_size_supported(bt, vlen)) {
-+    ret_value = false;
-+  }
-   if (ret_value) {
-     switch (opcode) {
-       case Op_AbsVB:
-@@ -1642,6 +1645,15 @@ const int Matcher::min_vector_size(const BasicType bt) {
-   return MIN2(size,max_size);
- }
- 
-+const bool Matcher::supports_scalable_vector() {
-+  return false;
-+}
-+
-+const int Matcher::scalable_vector_reg_size(const BasicType bt) {
-+  return -1;
-+}
-+
-+
- // Vector ideal reg corresponding to specified size in bytes
- const uint Matcher::vector_ideal_reg(int size) {
-   assert(MaxVectorSize >= size, "");
-diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad
-index bc9947327..bbe49bd62 100644
---- a/src/hotspot/cpu/x86/x86_32.ad
-+++ b/src/hotspot/cpu/x86/x86_32.ad
-@@ -11909,12 +11909,12 @@ instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2
-   ins_pipe( pipe_slow );
- %}
- 
--instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
-+instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
-                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
--  predicate(UseSSE42Intrinsics);
-+  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n) -> encoding() == StrIntrinsicNode::U));
-   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
--  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-   ins_encode %{
-     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
-                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
-@@ -11922,6 +11922,19 @@ instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
-   ins_pipe( pipe_slow );
- %}
- 
-+instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
-+                              eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
-+  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
-+  format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-+  ins_encode %{
-+    __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
-+                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
-+  %}
-+  ins_pipe( pipe_slow );
-+%}
-+
- // fast array equals
- instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
-                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
-diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
-index 7e6739ffe..53f887ea6 100644
---- a/src/hotspot/cpu/x86/x86_64.ad
-+++ b/src/hotspot/cpu/x86/x86_64.ad
-@@ -2975,7 +2975,7 @@ frame
-       RAX_H_num     // Op_RegL
-     };
-     // Excluded flags and vector registers.
--    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
-+    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
-     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
-   %}
- %}
-@@ -11509,13 +11509,13 @@ instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI
-   ins_pipe( pipe_slow );
- %}
- 
--instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
--                              rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr)
-+instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
-+                             rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr)
- %{
--  predicate(UseSSE42Intrinsics);
-+  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
-   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
--  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-+  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-   ins_encode %{
-     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
-                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
-@@ -11523,6 +11523,20 @@ instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
-   ins_pipe( pipe_slow );
- %}
- 
-+instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
-+                              rbx_RegI result, legVecS tmp_vec1, legVecS tmp_vec2, legVecS tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
-+%{
-+  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
-+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
-+  effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
-+  format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
-+  ins_encode %{
-+    __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
-+                           $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
-+  %}
-+  ins_pipe( pipe_slow );
-+%}
-+
- // fast string equals
- instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
-                        legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr)
 diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
-index 74945999e..6c79d20a4 100644
+index 2842a11f92..208a374eea 100644
 --- a/src/hotspot/os/linux/os_linux.cpp
 +++ b/src/hotspot/os/linux/os_linux.cpp
-@@ -1903,7 +1903,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
-     {EM_PARISC,      EM_PARISC,  ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"},
-     {EM_68K,         EM_68K,     ELFCLASS32, ELFDATA2MSB, (char*)"M68k"},
-     {EM_AARCH64,     EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"},
--    {EM_RISCV,       EM_RISCV,   ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V"},
-+#ifdef _LP64
-+    {EM_RISCV,       EM_RISCV,   ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V64"},
-+#else
-+    {EM_RISCV,       EM_RISCV,   ELFCLASS32, ELFDATA2LSB, (char*)"RISC-V32"},
-+#endif
-     {EM_LOONGARCH,   EM_LOONGARCH, ELFCLASS64, ELFDATA2LSB, (char*)"LoongArch"},
-   };
- 
-@@ -2735,6 +2739,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
+@@ -2829,6 +2829,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
    strncpy(cpuinfo, "IA64", length);
  #elif defined(PPC)
    strncpy(cpuinfo, "PPC64", length);
 +#elif defined(RISCV)
-+  strncpy(cpuinfo, LP64_ONLY("RISCV64") NOT_LP64("RISCV32"), length);
++  strncpy(cpuinfo, "RISCV64", length);
  #elif defined(S390)
    strncpy(cpuinfo, "S390", length);
  #elif defined(SPARC)
-@@ -3966,7 +3972,8 @@ size_t os::Linux::find_large_page_size() {
+@@ -4060,7 +4062,8 @@ size_t os::Linux::find_large_page_size() {
      IA64_ONLY(256 * M)
      PPC_ONLY(4 * M)
      S390_ONLY(1 * M)
@@ -54580,14 +51899,46 @@ index 74945999e..6c79d20a4 100644
  #endif // ZERO
  
    FILE *fp = fopen("/proc/meminfo", "r");
+diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
+new file mode 100644
+index 0000000000..f2610af6cd
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp
+@@ -0,0 +1,26 @@
++/*
++ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// nothing required here
 diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
 new file mode 100644
-index 000000000..961fff011
+index 0000000000..4a1ebee8b0
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
-@@ -0,0 +1,113 @@
+@@ -0,0 +1,189 @@
 +/*
-+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -54614,46 +51965,121 @@ index 000000000..961fff011
 +#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
 +#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
 +
-+#include "vm_version_riscv.hpp"
++#include "runtime/vm_version.hpp"
 +
 +// Implementation of class atomic
++
 +// Note that memory_order_conservative requires a full barrier after atomic stores.
 +// See https://patchwork.kernel.org/patch/3575821/
 +
++#if defined(__clang_major__)
++#define FULL_COMPILER_ATOMIC_SUPPORT
++#elif (__GNUC__ > 13) || ((__GNUC__ == 13) && (__GNUC_MINOR__ >= 2))
++#define FULL_COMPILER_ATOMIC_SUPPORT
++#endif
++
 +#define FULL_MEM_BARRIER  __sync_synchronize()
 +#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
 +#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
 +
 +template<size_t byte_size>
 +struct Atomic::PlatformAdd
-+  : public Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
++  : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
 +{
 +  template<typename I, typename D>
 +  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
++#ifndef FULL_COMPILER_ATOMIC_SUPPORT
++    // If we add add and fetch for sub word and are using older compiler
++    // it must be added here due to not using lib atomic.
++    STATIC_ASSERT(byte_size >= 4);
++#endif
++
 +    D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
 +    FULL_MEM_BARRIER;
 +    return res;
 +  }
++
++  template<typename I, typename D>
++  D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const {
++    return add_and_fetch(add_value, dest, order) - add_value;
++  }
 +};
 +
++#ifndef FULL_COMPILER_ATOMIC_SUPPORT
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
++                                                T volatile* dest __attribute__((unused)),
++                                                T compare_value,
++                                                atomic_memory_order order) const {
++  STATIC_ASSERT(1 == sizeof(T));
++
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
++
++  uint32_t volatile* aligned_dst = (uint32_t volatile*)(((uintptr_t)dest) & (~((uintptr_t)0x3)));
++  int shift = 8 * (((uintptr_t)dest) - ((uintptr_t)aligned_dst)); // 0, 8, 16, 24
++
++  uint64_t mask = 0xfful << shift; // 0x00000000..FF..
++  uint64_t remask = ~mask;         // 0xFFFFFFFF..00..
++
++  uint64_t w_cv = ((uint64_t)(unsigned char)compare_value) << shift;  // widen to 64-bit 0x00000000..CC..
++  uint64_t w_ev = ((uint64_t)(unsigned char)exchange_value) << shift; // widen to 64-bit 0x00000000..EE..
++
++  uint64_t old_value;
++  uint64_t rc_temp;
++
++  __asm__ __volatile__ (
++    "1:  lr.w      %0, %2      \n\t"
++    "    and       %1, %0, %5  \n\t" // ignore unrelated bytes and widen to 64-bit 0x00000000..XX..
++    "    bne       %1, %3, 2f  \n\t" // compare 64-bit w_cv
++    "    and       %1, %0, %6  \n\t" // remove old byte
++    "    or        %1, %1, %4  \n\t" // add new byte
++    "    sc.w      %1, %1, %2  \n\t" // store new word
++    "    bnez      %1, 1b      \n\t"
++    "2:                        \n\t"
++    : /*%0*/"=&r" (old_value), /*%1*/"=&r" (rc_temp), /*%2*/"+A" (*aligned_dst)
++    : /*%3*/"r" (w_cv), /*%4*/"r" (w_ev), /*%5*/"r" (mask), /*%6*/"r" (remask)
++    : "memory" );
++
++  if (order != memory_order_relaxed) {
++    FULL_MEM_BARRIER;
++  }
++
++  return (T)((old_value & mask) >> shift);
++}
++#endif
++
 +template<size_t byte_size>
 +template<typename T>
 +inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
 +                                                     T volatile* dest,
 +                                                     atomic_memory_order order) const {
++#ifndef FULL_COMPILER_ATOMIC_SUPPORT
++  // If we add xchg for sub word and are using older compiler
++  // it must be added here due to not using lib atomic.
++  STATIC_ASSERT(byte_size >= 4);
++#endif
++
 +  STATIC_ASSERT(byte_size == sizeof(T));
 +  T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE);
 +  FULL_MEM_BARRIER;
 +  return res;
 +}
 +
-+// No direct support for cmpxchg of bytes; emulate using int.
++// __attribute__((unused)) on dest is to get rid of spurious GCC warnings.
 +template<size_t byte_size>
 +template<typename T>
 +inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
-+                                                        T volatile* dest,
++                                                        T volatile* dest __attribute__((unused)),
 +                                                        T compare_value,
 +                                                        atomic_memory_order order) const {
++
++#ifndef FULL_COMPILER_ATOMIC_SUPPORT
++  STATIC_ASSERT(byte_size >= 4);
++#endif
++
 +  STATIC_ASSERT(byte_size == sizeof(T));
 +  T value = compare_value;
 +  if (order != memory_order_relaxed) {
@@ -54672,7 +52098,7 @@ index 000000000..961fff011
 +template<>
 +template<typename T>
 +inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
-+                                                T volatile* dest,
++                                                T volatile* dest __attribute__((unused)),
 +                                                T compare_value,
 +                                                atomic_memory_order order) const {
 +  STATIC_ASSERT(4 == sizeof(T));
@@ -54698,15 +52124,16 @@ index 000000000..961fff011
 +  return rv;
 +}
 +
++#undef FULL_COMPILER_ATOMIC_SUPPORT
 +#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
-diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp
+diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
 new file mode 100644
-index 000000000..44f04d1a9
+index 0000000000..28868c7640
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp
-@@ -0,0 +1,44 @@
++++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp
+@@ -0,0 +1,45 @@
 +/*
-+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -54727,20 +52154,21 @@ index 000000000..44f04d1a9
 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 + * or visit www.oracle.com if you need additional information or have any
 + * questions.
++ *
 + */
 +
-+#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP
-+#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP
++#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
++#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
 +
 +#include <byteswap.h>
 +
 +// Efficient swapping of data bytes from Java byte
 +// ordering to native byte ordering and vice versa.
-+inline u2 Bytes::swap_u2(u2 x) {
++inline u2   Bytes::swap_u2(u2 x) {
 +  return bswap_16(x);
 +}
 +
-+inline u4 Bytes::swap_u4(u4 x) {
++inline u4   Bytes::swap_u4(u4 x) {
 +  return bswap_32(x);
 +}
 +
@@ -54748,16 +52176,16 @@ index 000000000..44f04d1a9
 +  return bswap_64(x);
 +}
 +
-+#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP
++#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
 new file mode 100644
-index 000000000..645b40a7c
+index 0000000000..bdf36d6b4c
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp
-@@ -0,0 +1,116 @@
+@@ -0,0 +1,124 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -54787,7 +52215,7 @@ index 000000000..645b40a7c
 +  (void)memmove(to, from, count * HeapWordSize);
 +}
 +
-+static inline void pd_disjoint_words_helper(const HeapWord* from, HeapWord* to, size_t count, bool is_atomic) {
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
 +  switch (count) {
 +    case 8:  to[7] = from[7];   // fall through
 +    case 7:  to[6] = from[6];   // fall through
@@ -54799,20 +52227,28 @@ index 000000000..645b40a7c
 +    case 1:  to[0] = from[0];   // fall through
 +    case 0:  break;
 +    default:
-+      if(is_atomic) {
-+        while (count-- > 0) { *to++ = *from++; }
-+      } else {
-+        memcpy(to, from, count * HeapWordSize);
-+      }
++      memcpy(to, from, count * HeapWordSize);
++      break;
 +  }
 +}
 +
-+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_disjoint_words_helper(from, to, count, false);
-+}
-+
 +static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
-+  pd_disjoint_words_helper(from, to, count, true);
++  switch (count) {
++    case 8:  to[7] = from[7];
++    case 7:  to[6] = from[6];
++    case 6:  to[5] = from[5];
++    case 5:  to[4] = from[4];
++    case 4:  to[3] = from[3];
++    case 3:  to[2] = from[2];
++    case 2:  to[1] = from[1];
++    case 1:  to[0] = from[0];
++    case 0:  break;
++    default:
++      while (count-- > 0) {
++        *to++ = *from++;
++      }
++      break;
++  }
 +}
 +
 +static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
@@ -54873,12 +52309,12 @@ index 000000000..645b40a7c
 +#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
 new file mode 100644
-index 000000000..041cdf4ff
+index 0000000000..297414bfcd
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp
 @@ -0,0 +1,43 @@
 +/*
-+ * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -54922,12 +52358,12 @@ index 000000000..041cdf4ff
 +#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
 new file mode 100644
-index 000000000..842aa51e0
+index 0000000000..5b5d35553f
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp
-@@ -0,0 +1,73 @@
+@@ -0,0 +1,74 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -54956,7 +52392,7 @@ index 000000000..842aa51e0
 +
 +// Included in orderAccess.hpp header file.
 +
-+#include "vm_version_riscv.hpp"
++#include "runtime/vm_version.hpp"
 +
 +// Implementation of class OrderAccess.
 +
@@ -54977,37 +52413,38 @@ index 000000000..842aa51e0
 +  FULL_MEM_BARRIER;
 +}
 +
++
 +template<size_t byte_size>
 +struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
 +{
 +  template <typename T>
-+  T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
++  T operator()(const volatile T* p) const { T data; __atomic_load(const_cast<T*>(p), &data, __ATOMIC_ACQUIRE); return data; }
 +};
 +
 +template<size_t byte_size>
 +struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
 +{
 +  template <typename T>
-+  void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); }
++  void operator()(T v, volatile T* p) const { __atomic_store(const_cast<T*>(p), &v, __ATOMIC_RELEASE); }
 +};
 +
 +template<size_t byte_size>
 +struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
 +{
 +  template <typename T>
-+  void operator()(T v, volatile T* p) const { release_store(p, v); fence(); }
++  void operator()(T v, volatile T* p) const { release_store(p, v); OrderAccess::fence(); }
 +};
 +
 +#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
 new file mode 100644
-index 000000000..37947701b
+index 0000000000..8b772892b4
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp
-@@ -0,0 +1,628 @@
+@@ -0,0 +1,624 @@
 +/*
-+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -55032,8 +52469,6 @@ index 000000000..37947701b
 +
 +// no precompiled headers
 +#include "asm/macroAssembler.hpp"
-+#include "classfile/classLoader.hpp"
-+#include "classfile/systemDictionary.hpp"
 +#include "classfile/vmSymbols.hpp"
 +#include "code/codeCache.hpp"
 +#include "code/icBuffer.hpp"
@@ -55053,6 +52488,7 @@ index 000000000..37947701b
 +#include "runtime/javaCalls.hpp"
 +#include "runtime/mutexLocker.hpp"
 +#include "runtime/osThread.hpp"
++#include "runtime/safepointMechanism.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
 +#include "runtime/thread.inline.hpp"
@@ -55063,6 +52499,7 @@ index 000000000..37947701b
 +
 +// put OS-includes here
 +# include <dlfcn.h>
++# include <fpu_control.h>
 +# include <errno.h>
 +# include <pthread.h>
 +# include <signal.h>
@@ -55115,9 +52552,7 @@ index 000000000..37947701b
 +// frames. Currently we don't do that on Linux, so it's the same as
 +// os::fetch_frame_from_context().
 +ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
-+                                                const ucontext_t* uc,
-+                                                intptr_t** ret_sp,
-+                                                intptr_t** ret_fp) {
++  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
 +
 +  assert(thread != NULL, "just checking");
 +  assert(ret_sp != NULL, "just checking");
@@ -55127,9 +52562,9 @@ index 000000000..37947701b
 +}
 +
 +ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
-+                                        intptr_t** ret_sp, intptr_t** ret_fp) {
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
 +
-+  ExtendedPC  epc;
++  ExtendedPC epc;
 +  const ucontext_t* uc = (const ucontext_t*)ucVoid;
 +
 +  if (uc != NULL) {
@@ -55185,11 +52620,11 @@ index 000000000..37947701b
 +      // In compiled code, the stack banging is performed before RA
 +      // has been saved in the frame.  RA is live, and SP and FP
 +      // belong to the caller.
-+      intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc);
-+      intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc);
-+      address frame_pc = (address)(uintptr_t)(uc->uc_mcontext.__gregs[REG_LR] -
-+                         NativeInstruction::instruction_size);
-+      *fr = frame(frame_sp, frame_fp, frame_pc);
++      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
++      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
++      address pc = (address)(uc->uc_mcontext.__gregs[REG_LR]
++                         - NativeInstruction::instruction_size);
++      *fr = frame(sp, fp, pc);
 +      if (!fr->is_java_frame()) {
 +        assert(fr->safe_for_sender(thread), "Safety check");
 +        assert(!fr->is_first_frame(), "Safety check");
@@ -55209,7 +52644,7 @@ index 000000000..37947701b
 +
 +NOINLINE frame os::current_frame() {
 +  intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0);
-+  if(sender_sp != NULL) {
++  if (sender_sp != NULL) {
 +    frame myframe((intptr_t*)os::current_stack_pointer(),
 +                  sender_sp[frame::link_offset],
 +                  CAST_FROM_FN_PTR(address, os::current_frame));
@@ -55269,19 +52704,20 @@ index 000000000..37947701b
 +  JavaThread* thread = NULL;
 +  VMThread* vmthread = NULL;
 +  if (os::Linux::signal_handlers_are_installed) {
-+    if (t != NULL ) {
++    if (t != NULL ){
 +      if(t->is_Java_thread()) {
-+        thread = (JavaThread*)t;
-+      } else if(t->is_VM_thread()) {
++        thread = (JavaThread *) t;
++      }
++      else if(t->is_VM_thread()){
 +        vmthread = (VMThread *)t;
 +      }
 +    }
 +  }
 +
 +  // Handle SafeFetch faults
-+  if (uc != NULL) {
++  if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) {
 +    address const pc = (address) os::Linux::ucontext_get_pc(uc);
-+    if (StubRoutines::is_safefetch_fault(pc)) {
++    if (pc && StubRoutines::is_safefetch_fault(pc)) {
 +      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
 +      return 1;
 +    }
@@ -55290,7 +52726,7 @@ index 000000000..37947701b
 +  // decide if this trap can be handled by a stub
 +  address stub = NULL;
 +
-+  address pc = NULL;
++  address pc          = NULL;
 +
 +  //%note os_trap_1
 +  if (info != NULL && uc != NULL && thread != NULL) {
@@ -55385,7 +52821,7 @@ index 000000000..37947701b
 +          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
 +        }
 +      } else if (sig == SIGFPE  &&
-+          (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
++                 (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
 +        stub =
 +          SharedRuntime::
 +          continuation_for_implicit_exception(thread,
@@ -55393,7 +52829,7 @@ index 000000000..37947701b
 +                                              SharedRuntime::
 +                                              IMPLICIT_DIVIDE_BY_ZERO);
 +      } else if (sig == SIGSEGV &&
-+                 !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
++               !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
 +          // Determination of interpreter/vtable stub/compiled code null exception
 +          stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
 +      }
@@ -55407,9 +52843,9 @@ index 000000000..37947701b
 +    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
 +    // and the heap gets shrunk before the field access.
 +    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
-+      address addr_slow = JNI_FastGetField::find_slowcase_pc(pc);
-+      if (addr_slow != (address)-1) {
-+        stub = addr_slow;
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
 +      }
 +    }
 +
@@ -55427,9 +52863,7 @@ index 000000000..37947701b
 +
 +  if (stub != NULL) {
 +    // save all thread context in case we need to restore it
-+    if (thread != NULL) {
-+      thread->set_saved_exception_pc(pc);
-+    }
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
 +
 +    os::Linux::ucontext_set_pc(uc, stub);
 +    return true;
@@ -55471,7 +52905,6 @@ index 000000000..37947701b
 +void os::Linux::set_fpu_control_word(int fpu_control) {
 +}
 +
-+
 +////////////////////////////////////////////////////////////////////////////////
 +// thread stack
 +
@@ -55635,13 +53068,13 @@ index 000000000..37947701b
 +};
 diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
 new file mode 100644
-index 000000000..eae1635b0
+index 0000000000..f3e3a73bc5
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp
 @@ -0,0 +1,40 @@
 +/*
-+ * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -55681,12 +53114,12 @@ index 000000000..eae1635b0
 +#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
 new file mode 100644
-index 000000000..82b9bb6fd
+index 0000000000..2bd48e09c3
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp
 @@ -0,0 +1,38 @@
 +/*
-+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -55723,14 +53156,142 @@ index 000000000..82b9bb6fd
 +}
 +
 +#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp
+new file mode 100644
+index 0000000000..ffcd819487
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2023, Rivos Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "logging/log.hpp"
++#include "riscv_flush_icache.hpp"
++#include "runtime/os.hpp"
++#include "runtime/vm_version.hpp"
++#include "utilities/debug.hpp"
++
++#include <sys/syscall.h>
++#include <unistd.h>
++
++#define check_with_errno(check_type, cond, msg)                             \
++  do {                                                                      \
++    int err = errno;                                                        \
++    check_type(cond, "%s; error='%s' (errno=%s)", msg, os::strerror(err),   \
++               os::errno_name(err));                                        \
++} while (false)
++
++#define assert_with_errno(cond, msg)    check_with_errno(assert, cond, msg)
++#define guarantee_with_errno(cond, msg) check_with_errno(guarantee, cond, msg)
++
++#ifndef NR_riscv_flush_icache
++#ifndef NR_arch_specific_syscall
++#define NR_arch_specific_syscall 244
++#endif
++#define NR_riscv_flush_icache (NR_arch_specific_syscall + 15)
++#endif
++
++#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
++#define SYS_RISCV_FLUSH_ICACHE_ALL   0UL
++
++static long sys_flush_icache(uintptr_t start, uintptr_t end , uintptr_t flags) {
++  return syscall(NR_riscv_flush_icache, start, end, flags);
++}
++
++bool RiscvFlushIcache::test() {
++  ATTRIBUTE_ALIGNED(64) char memory[64];
++  long ret = sys_flush_icache((uintptr_t)&memory[0],
++                              (uintptr_t)&memory[sizeof(memory) - 1],
++                              SYS_RISCV_FLUSH_ICACHE_ALL);
++  if (ret == 0) {
++    return true;
++  }
++  int err = errno;                                                        \
++  log_error(os)("Syscall: RISCV_FLUSH_ICACHE not available; error='%s' (errno=%s)",
++                os::strerror(err), os::errno_name(err));
++  return false;
++}
++
++void RiscvFlushIcache::flush(uintptr_t start, uintptr_t end) {
++  long ret = sys_flush_icache(start, end, SYS_RISCV_FLUSH_ICACHE_ALL);
++  guarantee_with_errno(ret == 0, "riscv_flush_icache failed");
++}
+diff --git a/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp
+new file mode 100644
+index 0000000000..f4e7263b39
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2023, Rivos Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP
++#define OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP
++
++#include "memory/allocation.hpp"
++#include "runtime/vm_version.hpp"
++#include "utilities/growableArray.hpp"
++
++class RiscvFlushIcache: public AllStatic {
++ public:
++  static bool test();
++  static void flush(uintptr_t start, uintptr_t end);
++};
++
++#endif // OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
 new file mode 100644
-index 000000000..c78096931
+index 0000000000..ccceed643e
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp
-@@ -0,0 +1,103 @@
+@@ -0,0 +1,100 @@
 +/*
-+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -55778,13 +53339,10 @@ index 000000000..c78096931
 +}
 +
 +bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
-+  assert(this->is_Java_thread(), "must be JavaThread");
-+  JavaThread* jt = (JavaThread *)this;
-+
 +  // If we have a last_Java_frame, then we should use it even if
 +  // isInJava == true.  It should be more reliable than ucontext info.
-+  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
-+    *fr_addr = jt->pd_last_frame();
++  if (has_last_Java_frame() && frame_anchor()->walkable()) {
++    *fr_addr = pd_last_frame();
 +    return true;
 +  }
 +
@@ -55810,10 +53368,10 @@ index 000000000..c78096931
 +    }
 +
 +    frame ret_frame(ret_sp, ret_fp, addr.pc());
-+    if (!ret_frame.safe_for_sender(jt)) {
++    if (!ret_frame.safe_for_sender(this)) {
 +#ifdef COMPILER2
 +      frame ret_frame2(ret_sp, NULL, addr.pc());
-+      if (!ret_frame2.safe_for_sender(jt)) {
++      if (!ret_frame2.safe_for_sender(this)) {
 +        // nothing else to try if the frame isn't good
 +        return false;
 +      }
@@ -55834,12 +53392,12 @@ index 000000000..c78096931
 +void JavaThread::cache_global_variables() { }
 diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
 new file mode 100644
-index 000000000..657b98984
+index 0000000000..4b91fa855a
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp
 @@ -0,0 +1,67 @@
 +/*
-+ * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -55876,7 +53434,7 @@ index 000000000..657b98984
 + public:
 +  // Mutators are highly dangerous....
 +  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
-+  void  set_last_Java_fp(intptr_t* java_fp)      { _anchor.set_last_Java_fp(java_fp);   }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
 +
 +  void set_base_of_stack_pointer(intptr_t* base_sp) {
 +  }
@@ -55897,7 +53455,7 @@ index 000000000..657b98984
 +  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
 +private:
 +  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
-+
++public:
 +  // These routines are only used on cpu architectures that
 +  // have separate register stacks (Itanium).
 +  static bool register_stack_overflow() { return false; }
@@ -55907,12 +53465,12 @@ index 000000000..657b98984
 +#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
 new file mode 100644
-index 000000000..8ee443b5d
+index 0000000000..6cf7683a58
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp
 @@ -0,0 +1,55 @@
 +/*
-+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -55968,13 +53526,13 @@ index 000000000..8ee443b5d
 +#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP
 diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
 new file mode 100644
-index 000000000..ef9358aa0
+index 0000000000..8bcc949fed
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp
-@@ -0,0 +1,116 @@
+@@ -0,0 +1,137 @@
 +/*
 + * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -56049,6 +53607,20 @@ index 000000000..ef9358aa0
 +  return (uint32_t)read_csr(CSR_VLENB);
 +}
 +
++VM_Version::VM_MODE VM_Version::get_satp_mode() {
++  if (!strcmp(_vm_mode, "sv39")) {
++    return VM_SV39;
++  } else if (!strcmp(_vm_mode, "sv48")) {
++    return VM_SV48;
++  } else if (!strcmp(_vm_mode, "sv57")) {
++    return VM_SV57;
++  } else if (!strcmp(_vm_mode, "sv64")) {
++    return VM_SV64;
++  } else {
++    return VM_MBARE;
++  }
++}
++
 +void VM_Version::get_os_cpu_info() {
 +
 +  uint64_t auxv = getauxval(AT_HWCAP);
@@ -56061,11 +53633,30 @@ index 000000000..ef9358aa0
 +  STATIC_ASSERT(CPU_C == HWCAP_ISA_C);
 +  STATIC_ASSERT(CPU_V == HWCAP_ISA_V);
 +
++  // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs.
++  // Availability for those extensions could not be queried from HWCAP.
++  // TODO: Add proper detection for those extensions.
++  _features = auxv & (
++      HWCAP_ISA_I |
++      HWCAP_ISA_M |
++      HWCAP_ISA_A |
++      HWCAP_ISA_F |
++      HWCAP_ISA_D |
++      HWCAP_ISA_C |
++      HWCAP_ISA_V);
++
 +  if (FILE *f = fopen("/proc/cpuinfo", "r")) {
 +    char buf[512], *p;
 +    while (fgets(buf, sizeof (buf), f) != NULL) {
 +      if ((p = strchr(buf, ':')) != NULL) {
-+        if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
++        if (strncmp(buf, "mmu", sizeof "mmu" - 1) == 0) {
++          if (_vm_mode[0] != '\0') {
++            continue;
++          }
++          char* vm_mode = os::strdup(p + 2);
++          vm_mode[strcspn(vm_mode, "\n")] = '\0';
++          _vm_mode = vm_mode;
++        } else if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) {
 +          char* uarch = os::strdup(p + 2);
 +          uarch[strcspn(uarch, "\n")] = '\0';
 +          _uarch = uarch;
@@ -56075,105 +53666,71 @@ index 000000000..ef9358aa0
 +    }
 +    fclose(f);
 +  }
-+
-+  // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs.
-+  // Availability for those extensions could not be queried from HWCAP.
-+  // TODO: Add proper detection for those extensions.
-+  _features = auxv & (
-+      HWCAP_ISA_I |
-+      HWCAP_ISA_M |
-+      HWCAP_ISA_A |
-+      HWCAP_ISA_F |
-+      HWCAP_ISA_D |
-+      HWCAP_ISA_C |
-+      HWCAP_ISA_V);
 +}
-diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp
-index ba61aa4c0..4ca0b050b 100644
---- a/src/hotspot/share/adlc/archDesc.cpp
-+++ b/src/hotspot/share/adlc/archDesc.cpp
-@@ -929,6 +929,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
-   // Match Vector types.
-   if (strncmp(idealOp, "Vec",3)==0) {
-     switch(last_char) {
-+    case 'A':  return "TypeVect::VECTA";
-     case 'S':  return "TypeVect::VECTS";
-     case 'D':  return "TypeVect::VECTD";
-     case 'X':  return "TypeVect::VECTX";
-@@ -939,6 +940,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
-     }
-   }
- 
-+  if (strncmp(idealOp, "RegVMask", 8) == 0) {
-+    return "Type::BOTTOM";
-+  }
-+
-   // !!!!!
-   switch(last_char) {
-   case 'I':    return "TypeInt::INT";
-diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
-index f810fde76..2cf9636d1 100644
---- a/src/hotspot/share/adlc/formssel.cpp
-+++ b/src/hotspot/share/adlc/formssel.cpp
-@@ -3968,6 +3968,8 @@ bool MatchRule::is_base_register(FormDict &globals) const {
-          strcmp(opType,"RegL")==0 ||
-          strcmp(opType,"RegF")==0 ||
-          strcmp(opType,"RegD")==0 ||
-+         strcmp(opType,"RegVMask")==0 ||
-+         strcmp(opType,"VecA")==0 ||
-          strcmp(opType,"VecS")==0 ||
-          strcmp(opType,"VecD")==0 ||
-          strcmp(opType,"VecX")==0 ||
 diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
-index e30d39f73..af54dddf3 100644
+index e30d39f73d..c640c546b1 100644
 --- a/src/hotspot/share/c1/c1_LIR.cpp
 +++ b/src/hotspot/share/c1/c1_LIR.cpp
-@@ -199,7 +199,6 @@ bool LIR_OprDesc::is_oop() const {
- void LIR_Op2::verify() const {
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -200,6 +200,9 @@ void LIR_Op2::verify() const {
  #ifdef ASSERT
    switch (code()) {
--    case lir_cmove:
+     case lir_cmove:
++#ifdef RISCV
++      assert(false, "lir_cmove is LIR_Op4 on RISCV");
++#endif
      case lir_xchg:
        break;
  
-@@ -252,30 +251,27 @@ void LIR_Op2::verify() const {
+@@ -252,9 +255,13 @@ void LIR_Op2::verify() const {
  
  
  LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
--  : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
--  , _cond(cond)
--  , _type(type)
-+  : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
++#ifdef RISCV
++  : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
++#else
+   : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+   , _cond(cond)
+   , _type(type)
++#endif
    , _label(block->label())
-+  , _type(type)
    , _block(block)
    , _ublock(NULL)
-   , _stub(NULL) {
+@@ -262,9 +269,13 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block
  }
  
  LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
--  LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
--  , _cond(cond)
--  , _type(type)
-+  LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
++#ifdef RISCV
++  LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
++#else
+   LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+   , _cond(cond)
+   , _type(type)
++#endif
    , _label(stub->entry())
-+  , _type(type)
    , _block(NULL)
    , _ublock(NULL)
-   , _stub(stub) {
+@@ -272,9 +283,13 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) :
  }
  
  LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock)
--  : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
--  , _cond(cond)
--  , _type(type)
-+  : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
++#ifdef RISCV
++  : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type)
++#else
+   : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
+   , _cond(cond)
+   , _type(type)
++#endif
    , _label(block->label())
-+  , _type(type)
    , _block(block)
    , _ublock(ublock)
-   , _stub(NULL)
-@@ -296,13 +292,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) {
+@@ -296,13 +311,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) {
  }
  
  void LIR_OpBranch::negate_cond() {
@@ -56194,61 +53751,66 @@ index e30d39f73..af54dddf3 100644
      default: ShouldNotReachHere();
    }
  }
-@@ -525,6 +521,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+@@ -525,6 +540,15 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
        assert(op->as_OpBranch() != NULL, "must be");
        LIR_OpBranch* opBranch = (LIR_OpBranch*)op;
  
++#ifdef RISCV
 +      assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() &&
 +             opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() &&
 +             opBranch->_tmp5->is_illegal(), "not used");
 +
 +      if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1);
 +      if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2);
++#endif
 +
        if (opBranch->_info != NULL)     do_info(opBranch->_info);
        assert(opBranch->_result->is_illegal(), "not used");
        if (opBranch->_stub != NULL)     opBranch->stub()->visit(this);
-@@ -615,17 +618,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+@@ -615,6 +639,21 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
      // to the result operand, otherwise the backend fails
      case lir_cmove:
      {
--      assert(op->as_Op2() != NULL, "must be");
--      LIR_Op2* op2 = (LIR_Op2*)op;
++#ifdef RISCV
 +      assert(op->as_Op4() != NULL, "must be");
 +      LIR_Op4* op4 = (LIR_Op4*)op;
- 
--      assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() &&
--             op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used");
--      assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used");
++
 +      assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() &&
-+             op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "must be");
++             op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "not used");
 +      assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used");
- 
--      do_input(op2->_opr1);
--      do_input(op2->_opr2);
--      do_temp(op2->_opr2);
--      do_output(op2->_result);
++
 +      do_input(op4->_opr1);
 +      do_input(op4->_opr2);
 +      if (op4->_opr3->is_valid()) do_input(op4->_opr3);
 +      if (op4->_opr4->is_valid()) do_input(op4->_opr4);
 +      do_temp(op4->_opr2);
 +      do_output(op4->_result);
++#else
+       assert(op->as_Op2() != NULL, "must be");
+       LIR_Op2* op2 = (LIR_Op2*)op;
+ 
+@@ -626,6 +665,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
+       do_input(op2->_opr2);
+       do_temp(op2->_opr2);
+       do_output(op2->_result);
++#endif
  
        break;
      }
-@@ -1048,6 +1053,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
+@@ -1048,6 +1088,12 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
    masm->emit_op3(this);
  }
  
++#ifdef RISCV
 +void LIR_Op4::emit_code(LIR_Assembler* masm) {
 +  masm->emit_op4(this);
 +}
++#endif
 +
  void LIR_OpLock::emit_code(LIR_Assembler* masm) {
    masm->emit_lock(this);
    if (stub()) {
-@@ -1084,6 +1093,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block)
+@@ -1084,6 +1130,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block)
    , _file(NULL)
    , _line(0)
  #endif
@@ -56259,7 +53821,7 @@ index e30d39f73..af54dddf3 100644
  { }
  
  
-@@ -1101,6 +1114,38 @@ void LIR_List::set_file_and_line(const char * file, int line) {
+@@ -1101,6 +1151,38 @@ void LIR_List::set_file_and_line(const char * file, int line) {
  }
  #endif
  
@@ -56298,45 +53860,34 @@ index e30d39f73..af54dddf3 100644
  
  void LIR_List::append(LIR_InsertionBuffer* buffer) {
    assert(this == buffer->lir_list(), "wrong lir list");
-@@ -1680,7 +1725,6 @@ const char * LIR_Op::name() const {
-      case lir_cmp_l2i:               s = "cmp_l2i";       break;
-      case lir_ucmp_fd2i:             s = "ucomp_fd2i";    break;
-      case lir_cmp_fd2i:              s = "comp_fd2i";     break;
--     case lir_cmove:                 s = "cmove";         break;
-      case lir_add:                   s = "add";           break;
-      case lir_sub:                   s = "sub";           break;
-      case lir_mul:                   s = "mul";           break;
-@@ -1705,6 +1749,8 @@ const char * LIR_Op::name() const {
-      case lir_irem:                  s = "irem";          break;
-      case lir_fmad:                  s = "fmad";          break;
-      case lir_fmaf:                  s = "fmaf";          break;
-+     // LIR_Op4
-+     case lir_cmove:                 s = "cmove";         break;
-      // LIR_OpJavaCall
-      case lir_static_call:           s = "static";        break;
-      case lir_optvirtual_call:       s = "optvirtual";    break;
-@@ -1841,6 +1887,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) {
+@@ -1841,6 +1923,10 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) {
  // LIR_OpBranch
  void LIR_OpBranch::print_instr(outputStream* out) const {
    print_condition(out, cond());             out->print(" ");
++#ifdef RISCV
 +  in_opr1()->print(out); out->print(" ");
 +  in_opr2()->print(out); out->print(" ");
++#endif
    if (block() != NULL) {
      out->print("[B%d] ", block()->block_id());
    } else if (stub() != NULL) {
-@@ -1927,7 +1975,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
+@@ -1927,7 +2013,11 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
  
  // LIR_Op2
  void LIR_Op2::print_instr(outputStream* out) const {
--  if (code() == lir_cmove || code() == lir_cmp) {
++#ifdef RISCV
 +  if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) {
++#else
+   if (code() == lir_cmove || code() == lir_cmp) {
++#endif
      print_condition(out, condition());         out->print(" ");
    }
    in_opr1()->print(out);    out->print(" ");
-@@ -1978,6 +2026,15 @@ void LIR_Op3::print_instr(outputStream* out) const {
+@@ -1978,6 +2068,17 @@ void LIR_Op3::print_instr(outputStream* out) const {
    result_opr()->print(out);
  }
  
++#ifdef RISCV
 +// LIR_Op4
 +void LIR_Op4::print_instr(outputStream* out) const {
 +  print_condition(out, condition()); out->print(" ");
@@ -56346,68 +53897,95 @@ index e30d39f73..af54dddf3 100644
 +  in_opr4()->print(out);             out->print(" ");
 +  result_opr()->print(out);
 +}
++#endif
  
  void LIR_OpLock::print_instr(outputStream* out) const {
    hdr_opr()->print(out);   out->print(" ");
 diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
-index 3234ca018..88cd3b24e 100644
+index 3234ca018b..33943e369d 100644
 --- a/src/hotspot/share/c1/c1_LIR.hpp
 +++ b/src/hotspot/share/c1/c1_LIR.hpp
-@@ -864,9 +864,11 @@ class      LIR_OpConvert;
- class      LIR_OpAllocObj;
- class      LIR_OpRoundFP;
- class    LIR_Op2;
--class    LIR_OpDelay;
-+class      LIR_OpBranch;
-+class      LIR_OpDelay;
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -867,6 +867,9 @@ class    LIR_Op2;
+ class    LIR_OpDelay;
  class    LIR_Op3;
  class      LIR_OpAllocArray;
++#ifdef RISCV
 +class    LIR_Op4;
++#endif
  class    LIR_OpCall;
  class      LIR_OpJavaCall;
  class      LIR_OpRTCall;
-@@ -916,8 +918,6 @@ enum LIR_Code {
+@@ -916,8 +919,10 @@ enum LIR_Code {
        , lir_null_check
        , lir_return
        , lir_leal
--      , lir_branch
--      , lir_cond_float_branch
++#ifndef RISCV
+       , lir_branch
+       , lir_cond_float_branch
++#endif
        , lir_move
        , lir_convert
        , lir_alloc_object
-@@ -929,11 +929,12 @@ enum LIR_Code {
+@@ -929,11 +934,17 @@ enum LIR_Code {
        , lir_unwind
    , end_op1
    , begin_op2
++#ifdef RISCV
 +      , lir_branch
 +      , lir_cond_float_branch
++#endif
        , lir_cmp
        , lir_cmp_l2i
        , lir_ucmp_fd2i
        , lir_cmp_fd2i
--      , lir_cmove
++#ifndef RISCV
+       , lir_cmove
++#endif
        , lir_add
        , lir_sub
        , lir_mul
-@@ -964,6 +965,9 @@ enum LIR_Code {
+@@ -964,6 +975,11 @@ enum LIR_Code {
        , lir_fmad
        , lir_fmaf
    , end_op3
++#ifdef RISCV
 +  , begin_op4
 +      , lir_cmove
 +  , end_op4
++#endif
    , begin_opJavaCall
        , lir_static_call
        , lir_optvirtual_call
-@@ -1134,6 +1138,7 @@ class LIR_Op: public CompilationResourceObj {
+@@ -1001,6 +1017,11 @@ enum LIR_Code {
+   , begin_opAssert
+     , lir_assert
+   , end_opAssert
++#if defined(RISCV) && defined(INCLUDE_ZGC)
++  , begin_opZLoadBarrierTest
++    , lir_zloadbarrier_test
++  , end_opZLoadBarrierTest
++#endif
+ };
+ 
+ 
+@@ -1134,6 +1155,9 @@ class LIR_Op: public CompilationResourceObj {
    virtual LIR_Op1* as_Op1() { return NULL; }
    virtual LIR_Op2* as_Op2() { return NULL; }
    virtual LIR_Op3* as_Op3() { return NULL; }
++#ifdef RISCV
 +  virtual LIR_Op4* as_Op4() { return NULL; }
++#endif
    virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
    virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
    virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
-@@ -1410,51 +1415,6 @@ class LIR_OpRTCall: public LIR_OpCall {
+@@ -1410,51 +1434,6 @@ class LIR_OpRTCall: public LIR_OpCall {
    virtual void verify() const;
  };
  
@@ -56459,7 +54037,7 @@ index 3234ca018..88cd3b24e 100644
  class ConversionStub;
  
  class LIR_OpConvert: public LIR_Op1 {
-@@ -1614,19 +1574,19 @@ class LIR_Op2: public LIR_Op {
+@@ -1614,19 +1593,19 @@ class LIR_Op2: public LIR_Op {
    void verify() const;
  
   public:
@@ -56470,8 +54048,8 @@ index 3234ca018..88cd3b24e 100644
      , _opr2(opr2)
 -    , _type(T_ILLEGAL)
 -    , _condition(condition)
-+    , _type(type)
      , _fpu_stack_size(0)
++    , _type(type)
      , _tmp1(LIR_OprFact::illegalOpr)
      , _tmp2(LIR_OprFact::illegalOpr)
      , _tmp3(LIR_OprFact::illegalOpr)
@@ -56480,19 +54058,11 @@ index 3234ca018..88cd3b24e 100644
 -    assert(code == lir_cmp || code == lir_assert, "code check");
 +    , _tmp5(LIR_OprFact::illegalOpr)
 +    , _condition(condition) {
-+    assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check");
++    assert(code == lir_cmp || code == lir_assert RISCV_ONLY(|| code == lir_branch || code == lir_cond_float_branch), "code check");
    }
  
    LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
-@@ -1634,7 +1594,6 @@ class LIR_Op2: public LIR_Op {
-     , _opr1(opr1)
-     , _opr2(opr2)
-     , _type(type)
--    , _condition(condition)
-     , _fpu_stack_size(0)
-     , _tmp1(LIR_OprFact::illegalOpr)
-     , _tmp2(LIR_OprFact::illegalOpr)
-@@ -1651,14 +1610,14 @@ class LIR_Op2: public LIR_Op {
+@@ -1651,14 +1630,14 @@ class LIR_Op2: public LIR_Op {
      , _opr1(opr1)
      , _opr2(opr2)
      , _type(type)
@@ -56506,11 +54076,11 @@ index 3234ca018..88cd3b24e 100644
 -    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
 +    , _tmp5(LIR_OprFact::illegalOpr)
 +    , _condition(lir_cond_unknown) {
-+    assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
++    assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check");
    }
  
    LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
-@@ -1667,14 +1626,14 @@ class LIR_Op2: public LIR_Op {
+@@ -1667,14 +1646,14 @@ class LIR_Op2: public LIR_Op {
      , _opr1(opr1)
      , _opr2(opr2)
      , _type(T_ILLEGAL)
@@ -56524,32 +54094,45 @@ index 3234ca018..88cd3b24e 100644
 -    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
 +    , _tmp5(tmp5)
 +    , _condition(lir_cond_unknown) {
-+    assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check");
++    assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check");
    }
  
    LIR_Opr in_opr1() const                        { return _opr1; }
-@@ -1686,10 +1645,10 @@ class LIR_Op2: public LIR_Op {
+@@ -1686,10 +1665,18 @@ class LIR_Op2: public LIR_Op {
    LIR_Opr tmp4_opr() const                       { return _tmp4; }
    LIR_Opr tmp5_opr() const                       { return _tmp5; }
    LIR_Condition condition() const  {
--    assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
++#ifdef RISCV
 +    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition;
++#else
+     assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
++#endif
    }
    void set_condition(LIR_Condition condition) {
--    assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
++#ifdef RISCV
 +    assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition;
++#else
+     assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
++#endif
    }
  
    void set_fpu_stack_size(int size)              { _fpu_stack_size = size; }
-@@ -1703,6 +1662,53 @@ class LIR_Op2: public LIR_Op {
+@@ -1703,6 +1690,65 @@ class LIR_Op2: public LIR_Op {
    virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
  };
  
++#ifdef RISCV
 +class LIR_OpBranch: public LIR_Op2 {
++#else
++class LIR_OpBranch: public LIR_Op {
++#endif
 + friend class LIR_OpVisitState;
 +
 + private:
++#ifndef RISCV
++  LIR_Condition _cond;
 +  BasicType     _type;
++#endif
 +  Label*        _label;
 +  BlockBegin*   _block;  // if this is a branch to a block, this is the block
 +  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
@@ -56557,9 +54140,14 @@ index 3234ca018..88cd3b24e 100644
 +
 + public:
 +  LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl)
-+    : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
-+    , _label(lbl)
++#ifdef RISCV
++    : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL, type)
++#else
++    : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL)
++    , _cond(cond)
 +    , _type(type)
++#endif
++    , _label(lbl)
 +    , _block(NULL)
 +    , _ublock(NULL)
 +    , _stub(NULL) { }
@@ -56570,14 +54158,14 @@ index 3234ca018..88cd3b24e 100644
 +  // for unordered comparisons
 +  LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock);
 +
-+  LIR_Condition cond() const {
-+    return condition();
-+  }
-+
-+  void set_cond(LIR_Condition cond) {
-+    set_condition(cond);
-+  }
-+
++#ifdef RISCV
++  LIR_Condition cond()        const              { return condition();  }
++  void set_cond(LIR_Condition cond)              { set_condition(cond); }
++#else
++  LIR_Condition cond()        const              { return _cond;        }
++  void set_cond(LIR_Condition cond)              { _cond = cond;        }
++#endif
++  BasicType     type()        const              { return _type;        }
 +  Label*        label()       const              { return _label;       }
 +  BlockBegin*   block()       const              { return _block;       }
 +  BlockBegin*   ublock()      const              { return _ublock;      }
@@ -56595,10 +54183,11 @@ index 3234ca018..88cd3b24e 100644
  class LIR_OpAllocArray : public LIR_Op {
   friend class LIR_OpVisitState;
  
-@@ -1766,6 +1772,63 @@ class LIR_Op3: public LIR_Op {
+@@ -1766,6 +1812,65 @@ class LIR_Op3: public LIR_Op {
    virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
  };
  
++#ifdef RISCV
 +class LIR_Op4: public LIR_Op {
 +  friend class LIR_OpVisitState;
 + protected:
@@ -56623,12 +54212,12 @@ index 3234ca018..88cd3b24e 100644
 +    , _opr3(opr3)
 +    , _opr4(opr4)
 +    , _type(type)
-+    , _condition(condition)
 +    , _tmp1(LIR_OprFact::illegalOpr)
 +    , _tmp2(LIR_OprFact::illegalOpr)
 +    , _tmp3(LIR_OprFact::illegalOpr)
 +    , _tmp4(LIR_OprFact::illegalOpr)
-+    , _tmp5(LIR_OprFact::illegalOpr) {
++    , _tmp5(LIR_OprFact::illegalOpr)
++    , _condition(condition) {
 +    assert(code == lir_cmove, "code check");
 +    assert(type != T_ILLEGAL, "cmove should have type");
 +  }
@@ -56656,10 +54245,11 @@ index 3234ca018..88cd3b24e 100644
 +
 +  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
 +};
++#endif
  
  //--------------------------------
  class LabelObj: public CompilationResourceObj {
-@@ -1988,6 +2051,10 @@ class LIR_List: public CompilationResourceObj {
+@@ -1988,6 +2093,10 @@ class LIR_List: public CompilationResourceObj {
    const char *  _file;
    int           _line;
  #endif
@@ -56670,7 +54260,7 @@ index 3234ca018..88cd3b24e 100644
  
   public:
    void append(LIR_Op* op) {
-@@ -2000,6 +2067,12 @@ class LIR_List: public CompilationResourceObj {
+@@ -2000,6 +2109,12 @@ class LIR_List: public CompilationResourceObj {
      }
  #endif // PRODUCT
  
@@ -56683,7 +54273,7 @@ index 3234ca018..88cd3b24e 100644
      _operations.append(op);
  
  #ifdef ASSERT
-@@ -2016,6 +2089,10 @@ class LIR_List: public CompilationResourceObj {
+@@ -2016,6 +2131,10 @@ class LIR_List: public CompilationResourceObj {
    void set_file_and_line(const char * file, int line);
  #endif
  
@@ -56694,37 +54284,44 @@ index 3234ca018..88cd3b24e 100644
    //---------- accessors ---------------
    LIR_OpList* instructions_list()                { return &_operations; }
    int         length() const                     { return _operations.length(); }
-@@ -2149,8 +2226,9 @@ class LIR_List: public CompilationResourceObj {
+@@ -2149,9 +2268,16 @@ class LIR_List: public CompilationResourceObj {
    void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
    void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info);
  
--  void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
--    append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
++#ifdef RISCV
 +  void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type,
 +             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) {
 +    append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type));
++  }
++#else
+   void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
+     append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type));
    }
++#endif
  
    void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
+                 LIR_Opr t1, LIR_Opr t2, LIR_Opr result = LIR_OprFact::illegalOpr);
 diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
-index 160483d5f..42a0350f7 100644
+index 160483d5f7..68aec26c1e 100644
 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
 +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
-@@ -709,10 +709,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
+@@ -709,9 +709,11 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
        comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
        break;
  
--    case lir_cmove:
--      cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type());
--      break;
--
++#ifndef RISCV
+     case lir_cmove:
+       cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type());
+       break;
++#endif
+ 
      case lir_shl:
      case lir_shr:
-     case lir_ushr:
-@@ -776,6 +772,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
+@@ -776,6 +778,19 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
    }
  }
  
++#ifdef RISCV
 +void LIR_Assembler::emit_op4(LIR_Op4* op) {
 +  switch(op->code()) {
 +    case lir_cmove:
@@ -56736,47 +54333,64 @@ index 160483d5f..42a0350f7 100644
 +      break;
 +  }
 +}
++#endif
  
  void LIR_Assembler::build_frame() {
    _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
 diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
-index 44a5bcbe5..406a58d21 100644
+index 44a5bcbe54..baeb4aa442 100644
 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
 +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
-@@ -190,6 +190,7 @@ class LIR_Assembler: public CompilationResourceObj {
+@@ -190,6 +190,9 @@ class LIR_Assembler: public CompilationResourceObj {
    void emit_op1(LIR_Op1* op);
    void emit_op2(LIR_Op2* op);
    void emit_op3(LIR_Op3* op);
++#ifdef RISCV
 +  void emit_op4(LIR_Op4* op);
++#endif
    void emit_opBranch(LIR_OpBranch* op);
    void emit_opLabel(LIR_OpLabel* op);
    void emit_arraycopy(LIR_OpArrayCopy* op);
-@@ -222,7 +223,8 @@ class LIR_Assembler: public CompilationResourceObj {
+@@ -222,8 +225,12 @@ class LIR_Assembler: public CompilationResourceObj {
    void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);
    void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);  // info set for null exceptions
    void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
--  void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
++#ifdef RISCV
 +  void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type,
 +             LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr);
- 
++#else
+   void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
+-
++#endif
    void call(        LIR_OpJavaCall* op, relocInfo::relocType rtype);
    void ic_call(     LIR_OpJavaCall* op);
+   void vtable_call( LIR_OpJavaCall* op);
 diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
-index c28055fd9..d00bfe91a 100644
+index acc969ac9c..512b63c744 100644
 --- a/src/hotspot/share/c1/c1_LinearScan.cpp
 +++ b/src/hotspot/share/c1/c1_LinearScan.cpp
-@@ -1242,8 +1242,8 @@ void LinearScan::add_register_hints(LIR_Op* op) {
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -1242,8 +1242,13 @@ void LinearScan::add_register_hints(LIR_Op* op) {
        break;
      }
      case lir_cmove: {
--      assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
--      LIR_Op2* cmove = (LIR_Op2*)op;
++#ifdef RISCV
 +      assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4");
 +      LIR_Op4* cmove = (LIR_Op4*)op;
++#else
+       assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2");
+       LIR_Op2* cmove = (LIR_Op2*)op;
++#endif
  
        LIR_Opr move_from = cmove->in_opr1();
        LIR_Opr move_to = cmove->result_opr();
-@@ -3140,6 +3140,9 @@ void LinearScan::do_linear_scan() {
+@@ -3148,6 +3153,9 @@ void LinearScan::do_linear_scan() {
      }
    }
  
@@ -56786,7 +54400,7 @@ index c28055fd9..d00bfe91a 100644
    { TIME_LINEAR_SCAN(timer_optimize_lir);
  
      EdgeMoveOptimizer::optimize(ir()->code());
-@@ -3147,6 +3150,7 @@ void LinearScan::do_linear_scan() {
+@@ -3155,6 +3163,7 @@ void LinearScan::do_linear_scan() {
      // check that cfg is still correct after optimizations
      ir()->verify();
    }
@@ -56794,60 +54408,41 @@ index c28055fd9..d00bfe91a 100644
  
    NOT_PRODUCT(print_lir(1, "Before Code Generation", false));
    NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final));
-@@ -6284,14 +6288,14 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
+@@ -6292,14 +6301,23 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
                // There might be a cmove inserted for profiling which depends on the same
                // compare. If we change the condition of the respective compare, we have
                // to take care of this cmove as well.
--              LIR_Op2* prev_cmove = NULL;
++#ifdef RISCV
 +              LIR_Op4* prev_cmove = NULL;
++#else
+               LIR_Op2* prev_cmove = NULL;
++#endif
  
                for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) {
                  prev_op = instructions->at(j);
                  // check for the cmove
                  if (prev_op->code() == lir_cmove) {
--                  assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
--                  prev_cmove = (LIR_Op2*)prev_op;
++#ifdef RISCV
 +                  assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4");
 +                  prev_cmove = (LIR_Op4*)prev_op;
++#else
+                   assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2");
+                   prev_cmove = (LIR_Op2*)prev_op;
++#endif
                    assert(prev_branch->cond() == prev_cmove->condition(), "should be the same");
                  }
                  if (prev_op->code() == lir_cmp) {
-diff --git a/src/hotspot/share/classfile/vmSymbols.cpp b/src/hotspot/share/classfile/vmSymbols.cpp
-index 19fe196bc..d9cb8e999 100644
---- a/src/hotspot/share/classfile/vmSymbols.cpp
-+++ b/src/hotspot/share/classfile/vmSymbols.cpp
-@@ -523,6 +523,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) {
-     case vmIntrinsics::_indexOfIU:
-     case vmIntrinsics::_indexOfIUL:
-     case vmIntrinsics::_indexOfU_char:
-+    case vmIntrinsics::_indexOfL_char:
-     case vmIntrinsics::_compareToL:
-     case vmIntrinsics::_compareToU:
-     case vmIntrinsics::_compareToLU:
-@@ -808,6 +809,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) {
-   case vmIntrinsics::_indexOfIU:
-   case vmIntrinsics::_indexOfIUL:
-   case vmIntrinsics::_indexOfU_char:
-+  case vmIntrinsics::_indexOfL_char:
-     if (!SpecialStringIndexOf) return true;
-     break;
-   case vmIntrinsics::_equalsL:
-diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp
-index cef3f530c..a31525003 100644
---- a/src/hotspot/share/classfile/vmSymbols.hpp
-+++ b/src/hotspot/share/classfile/vmSymbols.hpp
-@@ -946,6 +946,7 @@
-   do_intrinsic(_indexOfIU,                java_lang_StringUTF16, indexOf_name, indexOfI_signature,               F_S)   \
-   do_intrinsic(_indexOfIUL,               java_lang_StringUTF16, indexOfUL_name, indexOfI_signature,             F_S)   \
-   do_intrinsic(_indexOfU_char,            java_lang_StringUTF16, indexOfChar_name, indexOfChar_signature,        F_S)   \
-+  do_intrinsic(_indexOfL_char,            java_lang_StringLatin1,indexOfChar_name, indexOfChar_signature,        F_S)   \
-    do_name(     indexOf_name,                                    "indexOf")                                             \
-    do_name(     indexOfChar_name,                                "indexOfChar")                                         \
-    do_name(     indexOfUL_name,                                  "indexOfLatin1")                                       \
 diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
-index 4771a8b86..295f82ccc 100644
+index 4771a8b865..6d377fa005 100644
 --- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
 +++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved.
++ * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -31,7 +31,7 @@
  #include "utilities/defaultStream.hpp"
  
@@ -56857,8 +54452,31 @@ index 4771a8b86..295f82ccc 100644
    vm_exit_during_initialization("Shenandoah GC is not supported on this platform.");
  #endif
  
+diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+index 9f8ce74243..125cc169be 100644
+--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
++++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -100,7 +100,11 @@ private:
+ 
+ public:
+   LIR_OpZLoadBarrierTest(LIR_Opr opr) :
++#ifdef RISCV
++      LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL),
++#else
+       LIR_Op(),
++#endif
+       _opr(opr) {}
+ 
+   virtual void visit(LIR_OpVisitState* state) {
 diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
-index e01a242a5..ff16de0e7 100644
+index e01a242a57..ff16de0e77 100644
 --- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
 +++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
 @@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
@@ -56870,1038 +54488,11 @@ index e01a242a5..ff16de0e7 100644
    return false;
  #else
    #warning "Unconfigured platform"
-diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp
-index 7768615b7..ef006f087 100644
---- a/src/hotspot/share/opto/c2compiler.cpp
-+++ b/src/hotspot/share/opto/c2compiler.cpp
-@@ -510,6 +510,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
-   case vmIntrinsics::_indexOfIU:
-   case vmIntrinsics::_indexOfIUL:
-   case vmIntrinsics::_indexOfU_char:
-+  case vmIntrinsics::_indexOfL_char:
-   case vmIntrinsics::_toBytesStringU:
-   case vmIntrinsics::_getCharsStringU:
-   case vmIntrinsics::_getCharStringU:
-diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp
-index 500054218..fafbde78d 100644
---- a/src/hotspot/share/opto/chaitin.cpp
-+++ b/src/hotspot/share/opto/chaitin.cpp
-@@ -77,6 +77,7 @@ void LRG::dump() const {
-   if( _is_oop ) tty->print("Oop ");
-   if( _is_float ) tty->print("Float ");
-   if( _is_vector ) tty->print("Vector ");
-+  if( _is_scalable ) tty->print("Scalable ");
-   if( _was_spilled1 ) tty->print("Spilled ");
-   if( _was_spilled2 ) tty->print("Spilled2 ");
-   if( _direct_conflict ) tty->print("Direct_conflict ");
-@@ -591,6 +592,7 @@ void PhaseChaitin::Register_Allocate() {
- 
-   // Merge multidefs if multiple defs representing the same value are used in a single block.
-   merge_multidefs();
-+  merge_debugdefs();
- 
- #ifdef ASSERT
-   // Veify the graph after RA.
-@@ -646,7 +648,15 @@ void PhaseChaitin::Register_Allocate() {
-           // Live ranges record the highest register in their mask.
-           // We want the low register for the AD file writer's convenience.
-           OptoReg::Name hi = lrg.reg(); // Get hi register
--          OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
-+          int num_regs = lrg.num_regs();
-+          if (lrg.is_scalable() && OptoReg::is_stack(hi)) {
-+            // For scalable vector registers, when they are allocated in physical
-+            // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable
-+            // vector. If they are allocated on stack, we need to get the actual
-+            // num_regs, which reflects the physical length of scalable registers.
-+            num_regs = lrg.scalable_reg_slots();
-+          }
-+          OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo
-           // We have to use pair [lo,lo+1] even for wide vectors because
-           // the rest of code generation works only with pairs. It is safe
-           // since for registers encoding only 'lo' is used.
-@@ -801,8 +811,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
-         // Check for vector live range (only if vector register is used).
-         // On SPARC vector uses RegD which could be misaligned so it is not
-         // processes as vector in RA.
--        if (RegMask::is_vector(ireg))
-+        if (RegMask::is_vector(ireg)) {
-           lrg._is_vector = 1;
-+          if (ireg == Op_VecA) {
-+            assert(Matcher::supports_scalable_vector(), "scalable vector should be supported");
-+            lrg._is_scalable = 1;
-+            // For scalable vector, when it is allocated in physical register,
-+            // num_regs is RegMask::SlotsPerVecA for reg mask,
-+            // which may not be the actual physical register size.
-+            // If it is allocated in stack, we need to get the actual
-+            // physical length of scalable vector register.
-+            lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
-+          }
-+        }
-         assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
-                "vector must be in vector registers");
- 
-@@ -912,6 +933,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
-           lrg.set_reg_pressure(1);
- #endif
-           break;
-+        case Op_VecA:
-+          assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
-+          assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity");
-+          assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned");
-+          lrg.set_num_regs(RegMask::SlotsPerVecA);
-+          lrg.set_reg_pressure(1);
-+          break;
-         case Op_VecS:
-           assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
-           assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
-@@ -1358,6 +1386,47 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
-   return false;
- }
- 
-+static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) {
-+  int num_regs = lrg.num_regs();
-+  OptoReg::Name assigned = mask.find_first_set(lrg, num_regs);
-+
-+  if (lrg.is_scalable()) {
-+    // a physical register is found
-+    if (chunk == 0 && OptoReg::is_reg(assigned)) {
-+      return assigned;
-+    }
-+
-+    // find available stack slots for scalable register
-+    if (lrg._is_vector) {
-+      num_regs = lrg.scalable_reg_slots();
-+      // if actual scalable vector register is exactly SlotsPerVecA * 32 bits
-+      if (num_regs == RegMask::SlotsPerVecA) {
-+        return assigned;
-+      }
-+
-+      // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it
-+      // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits
-+      // instead of SlotsPerVecA bits.
-+      assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg
-+      while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) {
-+        // Verify the found reg has scalable_reg_slots() bits set.
-+        if (mask.is_valid_reg(assigned, num_regs)) {
-+          return assigned;
-+        } else {
-+          // Remove more for each iteration
-+          mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg
-+          mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits
-+          assigned = mask.find_first_set(lrg, num_regs);
-+        }
-+      }
-+      return OptoReg::Bad; // will cause chunk change, and retry next chunk
-+    }
-+  }
-+
-+  return assigned;
-+}
-+
-+
- // Choose a color using the biasing heuristic
- OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
- 
-@@ -1391,7 +1460,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
-       RegMask tempmask = lrg.mask();
-       tempmask.AND(lrgs(copy_lrg).mask());
-       tempmask.clear_to_sets(lrg.num_regs());
--      OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
-+      OptoReg::Name reg = find_first_set(lrg, tempmask, chunk);
-       if (OptoReg::is_valid(reg))
-         return reg;
-     }
-@@ -1400,7 +1469,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
-   // If no bias info exists, just go with the register selection ordering
-   if (lrg._is_vector || lrg.num_regs() == 2) {
-     // Find an aligned set
--    return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
-+    return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk);
-   }
- 
-   // CNC - Fun hack.  Alternate 1st and 2nd selection.  Enables post-allocate
-@@ -1564,12 +1633,21 @@ uint PhaseChaitin::Select( ) {
-       int n_regs = lrg->num_regs();
-       assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
-       if (n_regs == 1 || !lrg->_fat_proj) {
--        assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
-+        if (Matcher::supports_scalable_vector()) {
-+          assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity");
-+        } else {
-+          assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
-+        }
-         lrg->Clear();           // Clear the mask
-         lrg->Insert(reg);       // Set regmask to match selected reg
-         // For vectors and pairs, also insert the low bit of the pair
--        for (int i = 1; i < n_regs; i++)
-+        // We always choose the high bit, then mask the low bits by register size
-+        if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack
-+          n_regs = lrg->scalable_reg_slots();
-+        }
-+        for (int i = 1; i < n_regs; i++) {
-           lrg->Insert(OptoReg::add(reg,-i));
-+        }
-         lrg->set_mask_size(n_regs);
-       } else {                  // Else fatproj
-         // mask must be equal to fatproj bits, by definition
-diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp
-index e5be5b966..b5d1b0604 100644
---- a/src/hotspot/share/opto/chaitin.hpp
-+++ b/src/hotspot/share/opto/chaitin.hpp
-@@ -115,9 +115,11 @@ public:
-     _msize_valid=1;
-     if (_is_vector) {
-       assert(!_fat_proj, "sanity");
--      _mask.verify_sets(_num_regs);
-+      if (!(_is_scalable && OptoReg::is_stack(_reg))) {
-+        assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
-+      }
-     } else if (_num_regs == 2 && !_fat_proj) {
--      _mask.verify_pairs();
-+      assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs");
-     }
- #endif
-   }
-@@ -143,10 +145,34 @@ public:
- private:
-   uint16_t _num_regs;           // 2 for Longs and Doubles, 1 for all else
-                                 // except _num_regs is kill count for fat_proj
-+
-+  // For scalable register, num_regs may not be the actual physical register size.
-+  // We need to get the actual physical length of scalable register when scalable
-+  // register is spilled. The size of one slot is 32-bit.
-+  uint _scalable_reg_slots;     // Actual scalable register length of slots. 
-+                                // Meaningful only when _is_scalable is true.  
- public:
-   int num_regs() const { return _num_regs; }
-   void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
- 
-+  uint scalable_reg_slots() { return _scalable_reg_slots; }
-+  void set_scalable_reg_slots(uint slots) {
-+    assert(_is_scalable, "scalable register");
-+    assert(slots > 0, "slots of scalable register is not valid");
-+    _scalable_reg_slots = slots;
-+  }
-+
-+  bool is_scalable() {
-+#ifdef ASSERT
-+    if (_is_scalable) {
-+      // Should only be a vector for now, but it could also be a RegVMask in future.
-+      assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg");
-+    }
-+#endif
-+    return _is_scalable;
-+  }
-+
-+
- private:
-   // Number of physical registers this live range uses when it colors
-   // Architecture and register-set dependent
-@@ -172,6 +198,7 @@ public:
-   uint   _is_oop:1,             // Live-range holds an oop
-          _is_float:1,           // True if in float registers
-          _is_vector:1,          // True if in vector registers
-+         _is_scalable:1,        // True if register size is scalable
-          _was_spilled1:1,       // True if prior spilling on def
-          _was_spilled2:1,       // True if twice prior spilling on def
-          _is_bound:1,           // live range starts life with no
-@@ -756,6 +783,7 @@ private:
- 
-   // Merge nodes that are a part of a multidef lrg and produce the same value within a block.
-   void merge_multidefs();
-+  void merge_debugdefs();
- 
- private:
- 
-diff --git a/src/hotspot/share/opto/intrinsicnode.hpp b/src/hotspot/share/opto/intrinsicnode.hpp
-index c0dfe1b0c..2d9526a39 100644
---- a/src/hotspot/share/opto/intrinsicnode.hpp
-+++ b/src/hotspot/share/opto/intrinsicnode.hpp
-@@ -47,10 +47,11 @@ class PartialSubtypeCheckNode : public Node {
- // Base class for Ideal nodes used in String intrinsic code.
- class StrIntrinsicNode: public Node {
-  public:
--  // Possible encodings of the two parameters passed to the string intrinsic.
-+  // Possible encodings of the parameters passed to the string intrinsic.
-   // 'L' stands for Latin1 and 'U' stands for UTF16. For example, 'LU' means that
-   // the first string is Latin1 encoded and the second string is UTF16 encoded.
--  typedef enum ArgEncoding { LL, LU, UL, UU, none } ArgEnc;
-+  // 'L' means that the single string is Latin1 encoded
-+  typedef enum ArgEncoding { LL, LU, UL, UU, L, U, none } ArgEnc;
- 
-  protected:
-   // Encoding of strings. Used to select the right version of the intrinsic.
-diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp
-index 6b6aa9e9b..8719c5b12 100644
---- a/src/hotspot/share/opto/library_call.cpp
-+++ b/src/hotspot/share/opto/library_call.cpp
-@@ -217,7 +217,7 @@ class LibraryCallKit : public GraphKit {
-   bool inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae);
-   Node* make_indexOf_node(Node* src_start, Node* src_count, Node* tgt_start, Node* tgt_count,
-                           RegionNode* region, Node* phi, StrIntrinsicNode::ArgEnc ae);
--  bool inline_string_indexOfChar();
-+  bool inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae);
-   bool inline_string_equals(StrIntrinsicNode::ArgEnc ae);
-   bool inline_string_toBytesU();
-   bool inline_string_getCharsU();
-@@ -590,7 +590,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
-   case vmIntrinsics::_indexOfIL:                return inline_string_indexOfI(StrIntrinsicNode::LL);
-   case vmIntrinsics::_indexOfIU:                return inline_string_indexOfI(StrIntrinsicNode::UU);
-   case vmIntrinsics::_indexOfIUL:               return inline_string_indexOfI(StrIntrinsicNode::UL);
--  case vmIntrinsics::_indexOfU_char:            return inline_string_indexOfChar();
-+  case vmIntrinsics::_indexOfU_char:            return inline_string_indexOfChar(StrIntrinsicNode::U);
-+  case vmIntrinsics::_indexOfL_char:            return inline_string_indexOfChar(StrIntrinsicNode::L);
- 
-   case vmIntrinsics::_equalsL:                  return inline_string_equals(StrIntrinsicNode::LL);
-   case vmIntrinsics::_equalsU:                  return inline_string_equals(StrIntrinsicNode::UU);
-@@ -1419,7 +1420,7 @@ Node* LibraryCallKit::make_indexOf_node(Node* src_start, Node* src_count, Node*
- }
- 
- //-----------------------------inline_string_indexOfChar-----------------------
--bool LibraryCallKit::inline_string_indexOfChar() {
-+bool LibraryCallKit::inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae) {
-   if (too_many_traps(Deoptimization::Reason_intrinsic)) {
-     return false;
-   }
-@@ -1434,12 +1435,12 @@ bool LibraryCallKit::inline_string_indexOfChar() {
- 
-   src = must_be_not_null(src, true);
- 
--  Node* src_offset = _gvn.transform(new LShiftINode(from_index, intcon(1)));
-+  Node* src_offset = ae == StrIntrinsicNode::L ? from_index : _gvn.transform(new LShiftINode(from_index, intcon(1)));
-   Node* src_start = array_element_address(src, src_offset, T_BYTE);
-   Node* src_count = _gvn.transform(new SubINode(max, from_index));
- 
-   // Range checks
--  generate_string_range_check(src, src_offset, src_count, true);
-+  generate_string_range_check(src, src_offset, src_count, ae == StrIntrinsicNode::U);
-   if (stopped()) {
-     return true;
-   }
-@@ -1447,7 +1448,7 @@ bool LibraryCallKit::inline_string_indexOfChar() {
-   RegionNode* region = new RegionNode(3);
-   Node* phi = new PhiNode(region, TypeInt::INT);
- 
--  Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, StrIntrinsicNode::none);
-+  Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, ae);
-   C->set_has_split_ifs(true); // Has chance for split-if optimization
-   _gvn.transform(result);
- 
-diff --git a/src/hotspot/share/opto/machnode.cpp b/src/hotspot/share/opto/machnode.cpp
-index 8d526b15d..92b4f7158 100644
---- a/src/hotspot/share/opto/machnode.cpp
-+++ b/src/hotspot/share/opto/machnode.cpp
-@@ -147,7 +147,7 @@ uint MachNode::size(PhaseRegAlloc *ra_) const {
-   return MachNode::emit_size(ra_);
- }
- 
--//------------------------------size-------------------------------------------
-+//-------------------------emit_size-------------------------------------------
- // Helper function that computes size by emitting code
- uint MachNode::emit_size(PhaseRegAlloc *ra_) const {
-   // Emit into a trash buffer and count bytes emitted.
-diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp
-index a52325680..dad70565b 100644
---- a/src/hotspot/share/opto/machnode.hpp
-+++ b/src/hotspot/share/opto/machnode.hpp
-@@ -334,6 +334,10 @@ public:
-   // Top-level ideal Opcode matched
-   virtual int ideal_Opcode()     const { return Op_Node; }
- 
-+  virtual bool is_Opcode_equal(Node* node) {
-+    return node->is_Mach() && (ideal_Opcode() == node->as_Mach()->ideal_Opcode());
-+  }
-+
-   // Adds the label for the case
-   virtual void add_case_label( int switch_val, Label* blockLabel);
- 
-diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp
-index 9e9b3383f..97de5e314 100644
---- a/src/hotspot/share/opto/matcher.cpp
-+++ b/src/hotspot/share/opto/matcher.cpp
-@@ -84,6 +84,7 @@ Matcher::Matcher()
-   idealreg2spillmask  [Op_RegF] = NULL;
-   idealreg2spillmask  [Op_RegD] = NULL;
-   idealreg2spillmask  [Op_RegP] = NULL;
-+  idealreg2spillmask  [Op_VecA] = NULL;
-   idealreg2spillmask  [Op_VecS] = NULL;
-   idealreg2spillmask  [Op_VecD] = NULL;
-   idealreg2spillmask  [Op_VecX] = NULL;
-@@ -110,6 +111,7 @@ Matcher::Matcher()
-   idealreg2mhdebugmask[Op_RegF] = NULL;
-   idealreg2mhdebugmask[Op_RegD] = NULL;
-   idealreg2mhdebugmask[Op_RegP] = NULL;
-+  idealreg2mhdebugmask[Op_VecA] = NULL;
-   idealreg2mhdebugmask[Op_VecS] = NULL;
-   idealreg2mhdebugmask[Op_VecD] = NULL;
-   idealreg2mhdebugmask[Op_VecX] = NULL;
-@@ -424,7 +426,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
- void Matcher::init_first_stack_mask() {
- 
-   // Allocate storage for spill masks as masks for the appropriate load type.
--  RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+5));
-+  RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+6));
- 
-   idealreg2spillmask  [Op_RegN] = &rms[0];
-   idealreg2spillmask  [Op_RegI] = &rms[1];
-@@ -447,11 +449,12 @@ void Matcher::init_first_stack_mask() {
-   idealreg2mhdebugmask[Op_RegD] = &rms[16];
-   idealreg2mhdebugmask[Op_RegP] = &rms[17];
- 
--  idealreg2spillmask  [Op_VecS] = &rms[18];
--  idealreg2spillmask  [Op_VecD] = &rms[19];
--  idealreg2spillmask  [Op_VecX] = &rms[20];
--  idealreg2spillmask  [Op_VecY] = &rms[21];
--  idealreg2spillmask  [Op_VecZ] = &rms[22];
-+  idealreg2spillmask  [Op_VecA] = &rms[18];
-+  idealreg2spillmask  [Op_VecS] = &rms[19];
-+  idealreg2spillmask  [Op_VecD] = &rms[20];
-+  idealreg2spillmask  [Op_VecX] = &rms[21];
-+  idealreg2spillmask  [Op_VecY] = &rms[22];
-+  idealreg2spillmask  [Op_VecZ] = &rms[23];
- 
-   OptoReg::Name i;
- 
-@@ -478,6 +481,7 @@ void Matcher::init_first_stack_mask() {
-   // Keep spill masks aligned.
-   aligned_stack_mask.clear_to_pairs();
-   assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
-+  RegMask scalable_stack_mask = aligned_stack_mask;
- 
-   *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
- #ifdef _LP64
-@@ -548,6 +552,26 @@ void Matcher::init_first_stack_mask() {
-     *idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ];
-      idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask);
-   }
-+
-+  if (Matcher::supports_scalable_vector()) {
-+    int k = 1;
-+    OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
-+    // Exclude last input arg stack slots to avoid spilling vector register there,
-+    // otherwise vector spills could stomp over stack slots in caller frame.
-+    for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) {
-+      scalable_stack_mask.Remove(in);
-+      in = OptoReg::add(in, -1);
-+    }
-+
-+    // For VecA
-+     scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA);
-+     assert(scalable_stack_mask.is_AllStack(), "should be infinite stack");
-+    *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA];
-+     idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask);
-+  } else {
-+    *idealreg2spillmask[Op_VecA] = RegMask::Empty;
-+  }
-+
-    if (UseFPUForSpilling) {
-      // This mask logic assumes that the spill operations are
-      // symmetric and that the registers involved are the same size.
-@@ -872,6 +896,11 @@ void Matcher::init_spill_mask( Node *ret ) {
-   idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
- 
-   // Vector regmasks.
-+  if (Matcher::supports_scalable_vector()) {
-+    TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));;
-+    MachNode *spillVectA = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTA));
-+    idealreg2regmask[Op_VecA] = &spillVectA->out_RegMask();
-+  }
-   if (Matcher::vector_size_supported(T_BYTE,4)) {
-     TypeVect::VECTS = TypeVect::make(T_BYTE, 4);
-     MachNode *spillVectS = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS));
-diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp
-index 244e3d1f8..9a8307102 100644
---- a/src/hotspot/share/opto/matcher.hpp
-+++ b/src/hotspot/share/opto/matcher.hpp
-@@ -310,7 +310,7 @@ public:
- 
-   // identify extra cases that we might want to provide match rules for
-   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
--  static const bool match_rule_supported_vector(int opcode, int vlen);
-+  static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt);
- 
-   // Some microarchitectures have mask registers used on vectors
-   static const bool has_predicated_vectors(void);
-@@ -333,6 +333,10 @@ public:
-             Matcher::min_vector_size(bt) <= size);
-   }
- 
-+  static const bool supports_scalable_vector();
-+  // Actual max scalable vector register length.
-+  static const int scalable_vector_reg_size(const BasicType bt);
-+
-   // Vector ideal reg
-   static const uint vector_ideal_reg(int len);
-   static const uint vector_shift_count_ideal_reg(int len);
-diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp
-index 02bb6bb16..99d51ba05 100644
---- a/src/hotspot/share/opto/node.cpp
-+++ b/src/hotspot/share/opto/node.cpp
-@@ -2359,6 +2359,27 @@ Node* Node::find_similar(int opc) {
-   return NULL;
- }
- 
-+//--------------------------is_similar-----------------------------------
-+// True if a node has the same opcode and inputs as "this".
-+bool Node::is_similar(Node* node) {
-+  if (this == node) {
-+    return true;
-+  } else {
-+    if (is_Opcode_equal(node) && (req() == node->req())) {
-+      for (uint i = 0; i < node->req(); i++) {
-+        if (in(i) != node->in(i)) {
-+          return false;
-+        }
-+      }
-+      return true;
-+    }
-+  }
-+  return false;
-+}
-+
-+bool Node::is_Opcode_equal(Node* node) {
-+  return Opcode() == node->Opcode();
-+}
- 
- //--------------------------unique_ctrl_out------------------------------
- // Return the unique control out if only one. Null if none or more than one.
-diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp
-index 0c0b9bf69..e24456d85 100644
---- a/src/hotspot/share/opto/node.hpp
-+++ b/src/hotspot/share/opto/node.hpp
-@@ -1030,6 +1030,11 @@ public:
-   // be found; Otherwise return NULL;
-   Node* find_similar(int opc);
- 
-+  // True if a node has the same opcode and inputs as "this".
-+  bool is_similar(Node* node);
-+
-+  virtual bool is_Opcode_equal(Node* node);
-+
-   // Return the unique control out if only one. Null if none or more than one.
-   Node* unique_ctrl_out() const;
- 
-diff --git a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp
-index e31e8d847..aa0483c73 100644
---- a/src/hotspot/share/opto/opcodes.cpp
-+++ b/src/hotspot/share/opto/opcodes.cpp
-@@ -38,12 +38,14 @@ const char *NodeClassNames[] = {
-   "RegF",
-   "RegD",
-   "RegL",
--  "RegFlags",
-+  "VecA",
-   "VecS",
-   "VecD",
-   "VecX",
-   "VecY",
-   "VecZ",
-+  "RegVMask",
-+  "RegFlags",
-   "_last_machine_leaf",
- #include "classes.hpp"
-   "_last_class_name",
-diff --git a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp
-index ae3d61ce0..0a77c3732 100644
---- a/src/hotspot/share/opto/opcodes.hpp
-+++ b/src/hotspot/share/opto/opcodes.hpp
-@@ -37,11 +37,13 @@ enum Opcodes {
-   macro(RegF)                   // Machine float   register
-   macro(RegD)                   // Machine double  register
-   macro(RegL)                   // Machine long    register
-+  macro(VecA)                   // Machine vectora register
-   macro(VecS)                   // Machine vectors register
-   macro(VecD)                   // Machine vectord register
-   macro(VecX)                   // Machine vectorx register
-   macro(VecY)                   // Machine vectory register
-   macro(VecZ)                   // Machine vectorz register
-+  macro(RegVMask)               // Vector mask/predicate register
-   macro(RegFlags)               // Machine flags   register
-   _last_machine_leaf,           // Split between regular opcodes and machine
- #include "classes.hpp"
-diff --git a/src/hotspot/share/opto/phase.cpp b/src/hotspot/share/opto/phase.cpp
-index 397a53713..89c7fc7c8 100644
---- a/src/hotspot/share/opto/phase.cpp
-+++ b/src/hotspot/share/opto/phase.cpp
-@@ -113,6 +113,7 @@ void Phase::print_timers() {
-     tty->print_cr ("         Regalloc Split:      %7.3f s", timers[_t_regAllocSplit].seconds());
-     tty->print_cr ("         Postalloc Copy Rem:  %7.3f s", timers[_t_postAllocCopyRemoval].seconds());
-     tty->print_cr ("         Merge multidefs:     %7.3f s", timers[_t_mergeMultidefs].seconds());
-+    tty->print_cr ("         Merge debugdefs:     %7.3f s", timers[_t_mergeDebugdefs].seconds());
-     tty->print_cr ("         Fixup Spills:        %7.3f s", timers[_t_fixupSpills].seconds());
-     tty->print_cr ("         Compact:             %7.3f s", timers[_t_chaitinCompact].seconds());
-     tty->print_cr ("         Coalesce 1:          %7.3f s", timers[_t_chaitinCoalesce1].seconds());
-@@ -130,6 +131,7 @@ void Phase::print_timers() {
-        timers[_t_regAllocSplit].seconds() +
-        timers[_t_postAllocCopyRemoval].seconds() +
-        timers[_t_mergeMultidefs].seconds() +
-+       timers[_t_mergeDebugdefs].seconds() +
-        timers[_t_fixupSpills].seconds() +
-        timers[_t_chaitinCompact].seconds() +
-        timers[_t_chaitinCoalesce1].seconds() +
-diff --git a/src/hotspot/share/opto/phase.hpp b/src/hotspot/share/opto/phase.hpp
-index 4b0c53ffc..b3302ec86 100644
---- a/src/hotspot/share/opto/phase.hpp
-+++ b/src/hotspot/share/opto/phase.hpp
-@@ -91,6 +91,7 @@ public:
-       _t_regAllocSplit,
-       _t_postAllocCopyRemoval,
-       _t_mergeMultidefs,
-+      _t_mergeDebugdefs,
-       _t_fixupSpills,
-       _t_chaitinCompact,
-       _t_chaitinCoalesce1,
-diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp
-index 46766b604..3f608bb40 100644
---- a/src/hotspot/share/opto/postaloc.cpp
-+++ b/src/hotspot/share/opto/postaloc.cpp
-@@ -27,6 +27,7 @@
- #include "memory/resourceArea.hpp"
- #include "opto/chaitin.hpp"
- #include "opto/machnode.hpp"
-+#include "opto/addnode.hpp"
- 
- // See if this register (or pairs, or vector) already contains the value.
- static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs,
-@@ -266,9 +267,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
-   Node *val = skip_copies(n->in(k));
-   if (val == x) return blk_adjust; // No progress?
- 
--  int n_regs = RegMask::num_registers(val->ideal_reg());
-   uint val_idx = _lrg_map.live_range_id(val);
-   OptoReg::Name val_reg = lrgs(val_idx).reg();
-+  int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx));
- 
-   // See if it happens to already be in the correct register!
-   // (either Phi's direct register, or the common case of the name
-@@ -305,8 +306,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
-     }
- 
-     Node *vv = value[reg];
-+    // For scalable register, number of registers may be inconsistent between
-+    // "val_reg" and "reg". For example, when "val" resides in register
-+    // but "reg" is located in stack.
-+    if (lrgs(val_idx).is_scalable()) {
-+      assert(val->ideal_reg() == Op_VecA, "scalable vector register");
-+      if (OptoReg::is_stack(reg)) {
-+        n_regs = lrgs(val_idx).scalable_reg_slots();
-+      } else {
-+        n_regs = RegMask::SlotsPerVecA;
-+      }
-+    }
-     if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
--      uint last = (n_regs-1); // Looking for the last part of a set
-+      uint last;
-+      if (lrgs(val_idx).is_scalable()) {
-+        assert(val->ideal_reg() == Op_VecA, "scalable vector register");
-+        // For scalable vector register, regmask is always SlotsPerVecA bits aligned
-+        last = RegMask::SlotsPerVecA - 1;
-+      } else {
-+        last = (n_regs-1); // Looking for the last part of a set
-+      }
-       if ((reg&last) != last) continue; // Wrong part of a set
-       if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
-     }
-@@ -410,6 +429,28 @@ void PhaseChaitin::merge_multidefs() {
-   }
- }
- 
-+void PhaseChaitin::merge_debugdefs() {
-+  Compile::TracePhase tp("merge_Debugdefs", &timers[_t_mergeDebugdefs]);
-+
-+  ResourceMark rm;
-+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
-+    Block* block = _cfg.get_block(i);
-+    for (int j = 0; j < (int) block->number_of_nodes(); j++) {
-+      Node* base = block->get_node(j);
-+      if (base && base->is_Mach() && base->outcnt() == 1) {
-+        Node* addp = base->unique_out();
-+        if (addp && addp->is_Mach() && addp->as_Mach()->ideal_Opcode() == Op_AddP) {
-+          Node* derived = addp->in(AddPNode::Address);
-+          if (base == addp->in(AddPNode::Base) && base->is_similar(derived)) {
-+            base->subsume_by(derived, Compile::current());
-+            block->remove_node(j--);
-+          }
-+        }
-+      }
-+    }
-+  }
-+}
-+
- int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) {
-   int blk_adjust = 0;
- 
-@@ -591,7 +632,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
-       uint k;
-       Node *phi = block->get_node(j);
-       uint pidx = _lrg_map.live_range_id(phi);
--      OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
-+      OptoReg::Name preg = lrgs(pidx).reg();
- 
-       // Remove copies remaining on edges.  Check for junk phi.
-       Node *u = NULL;
-@@ -619,7 +660,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
-       if( pidx ) {
-         value.map(preg,phi);
-         regnd.map(preg,phi);
--        int n_regs = RegMask::num_registers(phi->ideal_reg());
-+        int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx));
-         for (int l = 1; l < n_regs; l++) {
-           OptoReg::Name preg_lo = OptoReg::add(preg,-l);
-           value.map(preg_lo,phi);
-@@ -663,7 +704,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
-             regnd.map(ureg,   def);
-             // Record other half of doubles
-             uint def_ideal_reg = def->ideal_reg();
--            int n_regs = RegMask::num_registers(def_ideal_reg);
-+            int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def)));
-             for (int l = 1; l < n_regs; l++) {
-               OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
-               if (!value[ureg_lo] &&
-@@ -707,7 +748,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
-       }
- 
-       uint n_ideal_reg = n->ideal_reg();
--      int n_regs = RegMask::num_registers(n_ideal_reg);
-+      int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx));
-       if (n_regs == 1) {
-         // If Node 'n' does not change the value mapped by the register,
-         // then 'n' is a useless copy.  Do not update the register->node
-diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp
-index 2e04c42eb..34a701e84 100644
---- a/src/hotspot/share/opto/regmask.cpp
-+++ b/src/hotspot/share/opto/regmask.cpp
-@@ -24,6 +24,7 @@
- 
- #include "precompiled.hpp"
- #include "opto/ad.hpp"
-+#include "opto/chaitin.hpp"
- #include "opto/compile.hpp"
- #include "opto/matcher.hpp"
- #include "opto/node.hpp"
-@@ -116,30 +117,47 @@ const RegMask RegMask::Empty(
- 
- //=============================================================================
- bool RegMask::is_vector(uint ireg) {
--  return (ireg == Op_VecS || ireg == Op_VecD ||
-+  return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD ||
-           ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ );
- }
- 
- int RegMask::num_registers(uint ireg) {
-     switch(ireg) {
-       case Op_VecZ:
--        return 16;
-+        return SlotsPerVecZ;
-       case Op_VecY:
--        return 8;
-+        return SlotsPerVecY;
-       case Op_VecX:
--        return 4;
-+        return SlotsPerVecX;
-       case Op_VecD:
-+        return SlotsPerVecD;
-       case Op_RegD:
-       case Op_RegL:
- #ifdef _LP64
-       case Op_RegP:
- #endif
-         return 2;
-+      case Op_VecA:
-+        assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
-+        return SlotsPerVecA;
-     }
-     // Op_VecS and the rest ideal registers.
-     return 1;
- }
- 
-+int RegMask::num_registers(uint ireg, LRG &lrg) {
-+  int n_regs = num_registers(ireg);
-+
-+  // assigned is OptoReg which is selected by register allocator
-+  OptoReg::Name assigned = lrg.reg();
-+  assert(OptoReg::is_valid(assigned), "should be valid opto register");
-+
-+  if (lrg.is_scalable() && OptoReg::is_stack(assigned)) {
-+    n_regs = lrg.scalable_reg_slots();
-+  }
-+  return n_regs;
-+}
-+
- //------------------------------find_first_pair--------------------------------
- // Find the lowest-numbered register pair in the mask.  Return the
- // HIGHEST register number in the pair, or BAD if no pairs.
-@@ -238,14 +256,30 @@ int RegMask::is_bound_pair() const {
-   return true;
- }
- 
-+// Check that whether given reg number with size is valid
-+// for current regmask, where reg is the highest number.
-+bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const {
-+  for (int i = 0; i < size; i++) {
-+    if (!Member(reg - i)) {
-+      return false;
-+    }
-+  }
-+  return true;
-+}
-+
- // only indicies of power 2 are accessed, so index 3 is only filled in for storage.
- static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 };
- //------------------------------find_first_set---------------------------------
- // Find the lowest-numbered register set in the mask.  Return the
- // HIGHEST register number in the set, or BAD if no sets.
- // Works also for size 1.
--OptoReg::Name RegMask::find_first_set(const int size) const {
--  verify_sets(size);
-+OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const {
-+  if (lrg.is_scalable()) {
-+    // For scalable vector register, regmask is SlotsPerVecA bits aligned.
-+    assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets");
-+  } else {
-+    assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
-+  }
-   for (int i = 0; i < RM_SIZE; i++) {
-     if (_A[i]) {                // Found some bits
-       int bit = _A[i] & -_A[i]; // Extract low bit
-diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp
-index c64d08795..2688275be 100644
---- a/src/hotspot/share/opto/regmask.hpp
-+++ b/src/hotspot/share/opto/regmask.hpp
-@@ -28,6 +28,8 @@
- #include "code/vmreg.hpp"
- #include "opto/optoreg.hpp"
- 
-+class LRG;
-+
- // Some fun naming (textual) substitutions:
- //
- // RegMask::get_low_elem() ==> RegMask::find_first_elem()
-@@ -95,6 +97,7 @@ public:
-   // requirement is internal to the allocator, and independent of any
-   // particular platform.
-   enum { SlotsPerLong = 2,
-+         SlotsPerVecA = RISCV_ONLY(4) NOT_RISCV(8),
-          SlotsPerVecS = 1,
-          SlotsPerVecD = 2,
-          SlotsPerVecX = 4,
-@@ -204,10 +207,14 @@ public:
-     return false;
-   }
- 
-+  // Check that whether given reg number with size is valid
-+  // for current regmask, where reg is the highest number.
-+  bool is_valid_reg(OptoReg::Name reg, const int size) const;
-+
-   // Find the lowest-numbered register set in the mask.  Return the
-   // HIGHEST register number in the set, or BAD if no sets.
-   // Assert that the mask contains only bit sets.
--  OptoReg::Name find_first_set(const int size) const;
-+  OptoReg::Name find_first_set(LRG &lrg, const int size) const;
- 
-   // Clear out partial bits; leave only aligned adjacent bit sets of size.
-   void clear_to_sets(const int size);
-@@ -226,6 +233,7 @@ public:
- 
-   static bool is_vector(uint ireg);
-   static int num_registers(uint ireg);
-+  static int num_registers(uint ireg, LRG &lrg);
- 
-   // Fast overlap test.  Non-zero if any registers in common.
-   int overlap( const RegMask &rm ) const {
-diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
-index fed52e488..ee583236f 100644
---- a/src/hotspot/share/opto/superword.cpp
-+++ b/src/hotspot/share/opto/superword.cpp
-@@ -96,8 +96,11 @@ static const bool _do_vector_loop_experimental = false; // Experimental vectoriz
- //------------------------------transform_loop---------------------------
- void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
-   assert(UseSuperWord, "should be");
--  // Do vectors exist on this architecture?
--  if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
-+  // SuperWord only works with power of two vector sizes.
-+  int vector_width = Matcher::vector_width_in_bytes(T_BYTE);
-+  if (vector_width < 2 || !is_power_of_2(vector_width)) {
-+    return;
-+  }
- 
-   assert(lpt->_head->is_CountedLoop(), "must be");
-   CountedLoopNode *cl = lpt->_head->as_CountedLoop();
-diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
-index 7d767c47c..c9948df5f 100644
---- a/src/hotspot/share/opto/type.cpp
-+++ b/src/hotspot/share/opto/type.cpp
-@@ -79,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
-   { Bad,             T_ILLEGAL,    "vectory:",      false, 0,                    relocInfo::none          },  // VectorY
-   { Bad,             T_ILLEGAL,    "vectorz:",      false, 0,                    relocInfo::none          },  // VectorZ
- #else // all other
-+  { Bad,             T_ILLEGAL,    "vectora:",      false, Op_VecA,              relocInfo::none          },  // VectorA
-   { Bad,             T_ILLEGAL,    "vectors:",      false, Op_VecS,              relocInfo::none          },  // VectorS
-   { Bad,             T_ILLEGAL,    "vectord:",      false, Op_VecD,              relocInfo::none          },  // VectorD
-   { Bad,             T_ILLEGAL,    "vectorx:",      false, Op_VecX,              relocInfo::none          },  // VectorX
-@@ -655,6 +656,10 @@ void Type::Initialize_shared(Compile* current) {
-   // get_zero_type() should not happen for T_CONFLICT
-   _zero_type[T_CONFLICT]= NULL;
- 
-+  if (Matcher::supports_scalable_vector()) {
-+    TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));
-+  }
-+
-   // Vector predefined types, it needs initialized _const_basic_type[].
-   if (Matcher::vector_size_supported(T_BYTE,4)) {
-     TypeVect::VECTS = TypeVect::make(T_BYTE,4);
-@@ -671,6 +676,7 @@ void Type::Initialize_shared(Compile* current) {
-   if (Matcher::vector_size_supported(T_FLOAT,16)) {
-     TypeVect::VECTZ = TypeVect::make(T_FLOAT,16);
-   }
-+  mreg2type[Op_VecA] = TypeVect::VECTA;
-   mreg2type[Op_VecS] = TypeVect::VECTS;
-   mreg2type[Op_VecD] = TypeVect::VECTD;
-   mreg2type[Op_VecX] = TypeVect::VECTX;
-@@ -990,6 +996,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = {
- 
-   Bad,          // Tuple - handled in v-call
-   Bad,          // Array - handled in v-call
-+  Bad,          // VectorA - handled in v-call
-   Bad,          // VectorS - handled in v-call
-   Bad,          // VectorD - handled in v-call
-   Bad,          // VectorX - handled in v-call
-@@ -2329,6 +2336,7 @@ bool TypeAry::ary_must_be_exact() const {
- 
- //==============================TypeVect=======================================
- // Convenience common pre-built types.
-+const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic
- const TypeVect *TypeVect::VECTS = NULL; //  32-bit vectors
- const TypeVect *TypeVect::VECTD = NULL; //  64-bit vectors
- const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
-@@ -2339,10 +2347,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors
- const TypeVect* TypeVect::make(const Type *elem, uint length) {
-   BasicType elem_bt = elem->array_element_basic_type();
-   assert(is_java_primitive(elem_bt), "only primitive types in vector");
--  assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
-   assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
-   int size = length * type2aelembytes(elem_bt);
-   switch (Matcher::vector_ideal_reg(size)) {
-+  case Op_VecA:
-+    return (TypeVect*)(new TypeVectA(elem, length))->hashcons();
-   case Op_VecS:
-     return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
-   case Op_RegL:
-@@ -2375,6 +2384,7 @@ const Type *TypeVect::xmeet( const Type *t ) const {
-   default:                      // All else is a mistake
-     typerr(t);
- 
-+  case VectorA:
-   case VectorS:
-   case VectorD:
-   case VectorX:
-@@ -2429,6 +2439,8 @@ bool TypeVect::empty(void) const {
- #ifndef PRODUCT
- void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
-   switch (base()) {
-+  case VectorA:
-+    st->print("vectora["); break;
-   case VectorS:
-     st->print("vectors["); break;
-   case VectorD:
-diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp
-index 27d042d94..82ee2dfcb 100644
---- a/src/hotspot/share/opto/type.hpp
-+++ b/src/hotspot/share/opto/type.hpp
-@@ -53,6 +53,7 @@ class     TypeNarrowKlass;
- class   TypeAry;
- class   TypeTuple;
- class   TypeVect;
-+class     TypeVectA;
- class     TypeVectS;
- class     TypeVectD;
- class     TypeVectX;
-@@ -87,6 +88,7 @@ public:
- 
-     Tuple,                      // Method signature or object layout
-     Array,                      // Array types
-+    VectorA,                    // (Scalable) Vector types for vector length agnostic
-     VectorS,                    //  32bit Vector types
-     VectorD,                    //  64bit Vector types
-     VectorX,                    // 128bit Vector types
-@@ -769,6 +771,7 @@ public:
-   virtual const Type *xmeet( const Type *t) const;
-   virtual const Type *xdual() const;     // Compute dual right now.
- 
-+  static const TypeVect *VECTA;
-   static const TypeVect *VECTS;
-   static const TypeVect *VECTD;
-   static const TypeVect *VECTX;
-@@ -780,6 +783,11 @@ public:
- #endif
- };
- 
-+class TypeVectA : public TypeVect {
-+  friend class TypeVect;
-+  TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {}
-+};
-+
- class TypeVectS : public TypeVect {
-   friend class TypeVect;
-   TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
-@@ -1630,12 +1638,12 @@ inline const TypeAry *Type::is_ary() const {
- }
- 
- inline const TypeVect *Type::is_vect() const {
--  assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" );
-+  assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" );
-   return (TypeVect*)this;
- }
- 
- inline const TypeVect *Type::isa_vect() const {
--  return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL;
-+  return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL;
- }
- 
- inline const TypePtr *Type::is_ptr() const {
-diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp
-index de22591ba..b82d631f4 100644
---- a/src/hotspot/share/opto/vectornode.cpp
-+++ b/src/hotspot/share/opto/vectornode.cpp
-@@ -236,7 +236,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
-       (vlen > 1) && is_power_of_2(vlen) &&
-       Matcher::vector_size_supported(bt, vlen)) {
-     int vopc = VectorNode::opcode(opc, bt);
--    return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen);
-+    return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen, bt);
-   }
-   return false;
- }
-@@ -655,7 +655,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
-       (vlen > 1) && is_power_of_2(vlen) &&
-       Matcher::vector_size_supported(bt, vlen)) {
-     int vopc = ReductionNode::opcode(opc, bt);
--    return vopc != opc && Matcher::match_rule_supported(vopc);
-+    return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt);
-   }
-   return false;
- }
 diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp
-index c46247f2b..ee769634f 100644
+index a383297611..5e9228e705 100644
 --- a/src/hotspot/share/runtime/abstract_vm_version.cpp
 +++ b/src/hotspot/share/runtime/abstract_vm_version.cpp
-@@ -98,8 +98,13 @@ bool Abstract_VM_Version::_parallel_worker_threads_initialized = false;
-   #ifdef ZERO
-     #define VMTYPE "Zero"
-   #else // ZERO
--     #define VMTYPE COMPILER1_PRESENT("Client")   \
--                    COMPILER2_PRESENT("Server")
-+    #ifdef COMPILER2
-+      #define VMTYPE "Server"
-+    #elif defined(COMPILER1) 
-+      #define VMTYPE "Client"
-+    #else
-+      #define VMTYPE "Core"
-+    #endif // COMPILER2
-   #endif // ZERO
-   #endif // TIERED
- #endif
-@@ -196,7 +201,8 @@ const char* Abstract_VM_Version::jre_release_version() {
+@@ -196,7 +196,8 @@ const char* Abstract_VM_Version::jre_release_version() {
                   IA32_ONLY("x86")                \
                   IA64_ONLY("ia64")               \
                   S390_ONLY("s390")               \
@@ -57912,10 +54503,10 @@ index c46247f2b..ee769634f 100644
  #endif // !CPU
  
 diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
-index 0a9c45f85..a96c2dd81 100644
+index 34c8d98362..7cf95058fe 100644
 --- a/src/hotspot/share/runtime/thread.hpp
 +++ b/src/hotspot/share/runtime/thread.hpp
-@@ -1234,7 +1234,7 @@ class JavaThread: public Thread {
+@@ -1259,7 +1259,7 @@ class JavaThread: public Thread {
    address last_Java_pc(void)                     { return _anchor.last_Java_pc(); }
  
    // Safepoint support
@@ -57925,35 +54516,30 @@ index 0a9c45f85..a96c2dd81 100644
    void set_thread_state(JavaThreadState s)       {
      assert(current_or_null() == NULL || current_or_null() == this,
 diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp
-index dee8534f7..aa71d7655 100644
+index dee8534f73..9af07aeb45 100644
 --- a/src/hotspot/share/runtime/thread.inline.hpp
 +++ b/src/hotspot/share/runtime/thread.inline.hpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
+  * Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
 @@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) {
    set_has_async_exception();
  }
  
 -#if defined(PPC64) || defined (AARCH64)
-+#if defined(PPC64) || defined(AARCH64) || defined(RISCV64)
++#if defined(PPC64) || defined (AARCH64) || defined(RISCV64)
  inline JavaThreadState JavaThread::thread_state() const    {
    return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state);
  }
-diff --git a/src/hotspot/share/utilities/debug.cpp b/src/hotspot/share/utilities/debug.cpp
-index 0b898dcc3..7f76486ae 100644
---- a/src/hotspot/share/utilities/debug.cpp
-+++ b/src/hotspot/share/utilities/debug.cpp
-@@ -632,6 +632,7 @@ void help() {
-   tty->print_cr("                   pns($sp, $rbp, $pc) on Linux/amd64 and Solaris/amd64 or");
-   tty->print_cr("                   pns($sp, $ebp, $pc) on Linux/x86 or");
-   tty->print_cr("                   pns($sp, $fp, $pc)  on Linux/AArch64 or");
-+  tty->print_cr("                   pns($sp, $fp, $pc)  on Linux/RISCV64 or");
-   tty->print_cr("                   pns($sp, 0, $pc)    on Linux/ppc64 or");
-   tty->print_cr("                   pns($sp + 0x7ff, 0, $pc) on Solaris/SPARC");
-   tty->print_cr("                 - in gdb do 'set overload-resolution off' before calling pns()");
 diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
-index cf8025386..e8ab3097a 100644
+index 6605ab367c..7f1bcff6b3 100644
 --- a/src/hotspot/share/utilities/macros.hpp
 +++ b/src/hotspot/share/utilities/macros.hpp
-@@ -597,6 +597,32 @@
+@@ -601,6 +601,32 @@
  
  #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x))
  
@@ -57986,26 +54572,17 @@ index cf8025386..e8ab3097a 100644
  #ifdef VM_LITTLE_ENDIAN
  #define LITTLE_ENDIAN_ONLY(code) code
  #define BIG_ENDIAN_ONLY(code)
-diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java
-index 063a5ef3a..50e9cdb57 100644
---- a/src/java.base/share/classes/java/lang/StringLatin1.java
-+++ b/src/java.base/share/classes/java/lang/StringLatin1.java
-@@ -209,6 +209,11 @@ final class StringLatin1 {
-             // Note: fromIndex might be near -1>>>1.
-             return -1;
-         }
-+        return indexOfChar(value, ch, fromIndex, max);
-+    }
-+
-+    @HotSpotIntrinsicCandidate
-+    private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) {
-         byte c = (byte)ch;
-         for (int i = fromIndex; i < max; i++) {
-             if (value[i] == c) {
 diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-index 0d834302c..55a7b96f7 100644
+index 0d834302c5..45a927fb5e 100644
 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
 +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -58,6 +58,10 @@
  #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
  #endif
@@ -58022,71 +54599,76 @@ index 0d834302c..55a7b96f7 100644
  }
  
 -#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64)
-+#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64)
++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64)
  JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
    (JNIEnv *env, jobject this_obj, jint lwp_id) {
  
-@@ -422,6 +426,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
- #ifdef aarch64
- #define NPRGREG sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_NPRGREG
+@@ -425,6 +429,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+ #if defined(sparc) || defined(sparcv9)
+ #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
  #endif
 +#ifdef riscv64
 +#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG
 +#endif
- #if defined(sparc) || defined(sparcv9)
- #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
+ #if defined(ppc64) || defined(ppc64le)
+ #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG
  #endif
-@@ -534,6 +541,46 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+@@ -534,6 +541,44 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
    }
  #endif /* aarch64 */
  
 +#if defined(riscv64)
++#define REG_INDEX(reg)  sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg
++
++  regs[REG_INDEX(PC)]  = gregs.pc;
++  regs[REG_INDEX(LR)]  = gregs.ra;
++  regs[REG_INDEX(SP)]  = gregs.sp;
++  regs[REG_INDEX(R3)]  = gregs.gp;
++  regs[REG_INDEX(R4)]  = gregs.tp;
++  regs[REG_INDEX(R5)]  = gregs.t0;
++  regs[REG_INDEX(R6)]  = gregs.t1;
++  regs[REG_INDEX(R7)]  = gregs.t2;
++  regs[REG_INDEX(R8)]  = gregs.s0;
++  regs[REG_INDEX(R9)]  = gregs.s1;
++  regs[REG_INDEX(R10)]  = gregs.a0;
++  regs[REG_INDEX(R11)]  = gregs.a1;
++  regs[REG_INDEX(R12)]  = gregs.a2;
++  regs[REG_INDEX(R13)]  = gregs.a3;
++  regs[REG_INDEX(R14)]  = gregs.a4;
++  regs[REG_INDEX(R15)]  = gregs.a5;
++  regs[REG_INDEX(R16)]  = gregs.a6;
++  regs[REG_INDEX(R17)]  = gregs.a7;
++  regs[REG_INDEX(R18)]  = gregs.s2;
++  regs[REG_INDEX(R19)]  = gregs.s3;
++  regs[REG_INDEX(R20)]  = gregs.s4;
++  regs[REG_INDEX(R21)]  = gregs.s5;
++  regs[REG_INDEX(R22)]  = gregs.s6;
++  regs[REG_INDEX(R23)]  = gregs.s7;
++  regs[REG_INDEX(R24)]  = gregs.s8;
++  regs[REG_INDEX(R25)]  = gregs.s9;
++  regs[REG_INDEX(R26)]  = gregs.s10;
++  regs[REG_INDEX(R27)]  = gregs.s11;
++  regs[REG_INDEX(R28)]  = gregs.t3;
++  regs[REG_INDEX(R29)]  = gregs.t4;
++  regs[REG_INDEX(R30)]  = gregs.t5;
++  regs[REG_INDEX(R31)]  = gregs.t6;
 +
-+#define REG_INDEX(reg) sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg
-+
-+  {
-+    regs[REG_INDEX(PC)]  = gregs.pc;
-+    regs[REG_INDEX(LR)]  = gregs.ra;
-+    regs[REG_INDEX(SP)]  = gregs.sp;
-+    regs[REG_INDEX(R3)]  = gregs.gp;
-+    regs[REG_INDEX(R4)]  = gregs.tp;
-+    regs[REG_INDEX(R5)]  = gregs.t0;
-+    regs[REG_INDEX(R6)]  = gregs.t1;
-+    regs[REG_INDEX(R7)]  = gregs.t2;
-+    regs[REG_INDEX(R8)]  = gregs.s0;
-+    regs[REG_INDEX(R9)]  = gregs.s1;
-+    regs[REG_INDEX(R10)] = gregs.a0;
-+    regs[REG_INDEX(R11)] = gregs.a1;
-+    regs[REG_INDEX(R12)] = gregs.a2;
-+    regs[REG_INDEX(R13)] = gregs.a3;
-+    regs[REG_INDEX(R14)] = gregs.a4;
-+    regs[REG_INDEX(R15)] = gregs.a5;
-+    regs[REG_INDEX(R16)] = gregs.a6;
-+    regs[REG_INDEX(R17)] = gregs.a7;
-+    regs[REG_INDEX(R18)] = gregs.s2;
-+    regs[REG_INDEX(R19)] = gregs.s3;
-+    regs[REG_INDEX(R20)] = gregs.s4;
-+    regs[REG_INDEX(R21)] = gregs.s5;
-+    regs[REG_INDEX(R22)] = gregs.s6;
-+    regs[REG_INDEX(R23)] = gregs.s7;
-+    regs[REG_INDEX(R24)] = gregs.s8;
-+    regs[REG_INDEX(R25)] = gregs.s9;
-+    regs[REG_INDEX(R26)] = gregs.s10;
-+    regs[REG_INDEX(R27)] = gregs.s11;
-+    regs[REG_INDEX(R28)] = gregs.t3;
-+    regs[REG_INDEX(R29)] = gregs.t4;
-+    regs[REG_INDEX(R30)] = gregs.t5;
-+    regs[REG_INDEX(R31)] = gregs.t6;
-+  }
 +#endif /* riscv64 */
 +
  #if defined(ppc64) || defined(ppc64le)
  #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg
  
 diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
-index 8318e8e02..9d7fda8a6 100644
+index 8318e8e021..ab092d4ee3 100644
 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
 +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -43,6 +43,8 @@
  #elif defined(arm)
  #include <asm/ptrace.h>
@@ -58096,41 +54678,11 @@ index 8318e8e02..9d7fda8a6 100644
  #endif
  
  // This C bool type must be int for compatibility with Linux calls and
-diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-index de5254d85..12eafc455 100644
---- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-@@ -134,6 +134,9 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
- #define ptrace_getregs(request, pid, addr, data) ptrace(request, pid, data, addr)
- #endif
- 
-+// riscv kernel didn't implement compat_arch_ptrace function that will handle PT_GETREGS case
-+// like other platforms, so call ptrace with PTRACE_GETREGSET here.
-+#ifndef riscv64
- #if defined(_LP64) && defined(PTRACE_GETREGS64)
- #define PTRACE_GETREGS_REQ PTRACE_GETREGS64
- #elif defined(PTRACE_GETREGS)
-@@ -141,6 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
- #elif defined(PT_GETREGS)
- #define PTRACE_GETREGS_REQ PT_GETREGS
- #endif
-+#endif
- 
- #ifdef PTRACE_GETREGS_REQ
-  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
-index 0f5f0119c..82c083055 100644
+index 0f5f0119c7..9bff9ee9b1 100644
 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
-@@ -1,6 +1,7 @@
- /*
-  * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
-  * Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -36,6 +37,7 @@ import sun.jvm.hotspot.debugger.MachineDescription;
+@@ -36,6 +36,7 @@ import sun.jvm.hotspot.debugger.MachineDescription;
  import sun.jvm.hotspot.debugger.MachineDescriptionAMD64;
  import sun.jvm.hotspot.debugger.MachineDescriptionPPC64;
  import sun.jvm.hotspot.debugger.MachineDescriptionAArch64;
@@ -58138,24 +54690,24 @@ index 0f5f0119c..82c083055 100644
  import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
  import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
  import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
-@@ -592,6 +594,8 @@ public class HotSpotAgent {
-             machDesc = new MachineDescriptionPPC64();
-         } else if (cpu.equals("aarch64")) {
-             machDesc = new MachineDescriptionAArch64();
+@@ -598,6 +599,8 @@ public class HotSpotAgent {
+             } else {
+                     machDesc = new MachineDescriptionSPARC32Bit();
+             }
 +        } else if (cpu.equals("riscv64")) {
 +            machDesc = new MachineDescriptionRISCV64();
-         } else if (cpu.equals("sparc")) {
-             if (LinuxDebuggerLocal.getAddressSize()==8) {
-                     machDesc = new MachineDescriptionSPARC64Bit();
+         } else {
+           try {
+             machDesc = (MachineDescription)
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
 new file mode 100644
-index 000000000..4221937f1
+index 0000000000..a972516dee
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java
 @@ -0,0 +1,40 @@
 +/*
 + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -58194,18 +54746,24 @@ index 000000000..4221937f1
 +  }
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
-index 5e5a6bb71..acd5844ca 100644
+index 5e5a6bb714..dc0bcb3da9 100644
 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
-@@ -33,6 +33,7 @@ import sun.jvm.hotspot.debugger.cdbg.*;
- import sun.jvm.hotspot.debugger.x86.*;
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
+  * Copyright (c) 2015, Red Hat Inc.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+@@ -34,12 +34,14 @@ import sun.jvm.hotspot.debugger.x86.*;
  import sun.jvm.hotspot.debugger.amd64.*;
  import sun.jvm.hotspot.debugger.aarch64.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
  import sun.jvm.hotspot.debugger.sparc.*;
++import sun.jvm.hotspot.debugger.riscv64.*;
  import sun.jvm.hotspot.debugger.ppc64.*;
  import sun.jvm.hotspot.debugger.linux.x86.*;
-@@ -40,6 +41,7 @@ import sun.jvm.hotspot.debugger.linux.amd64.*;
+ import sun.jvm.hotspot.debugger.linux.amd64.*;
  import sun.jvm.hotspot.debugger.linux.sparc.*;
  import sun.jvm.hotspot.debugger.linux.ppc64.*;
  import sun.jvm.hotspot.debugger.linux.aarch64.*;
@@ -58231,7 +54789,7 @@ index 5e5a6bb71..acd5844ca 100644
         return context.getTopFrame(dbg);
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
 new file mode 100644
-index 000000000..eaef586b4
+index 0000000000..f06da24bd0
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java
 @@ -0,0 +1,90 @@
@@ -58327,7 +54885,7 @@ index 000000000..eaef586b4
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
 new file mode 100644
-index 000000000..4789e664c
+index 0000000000..fdb841ccf3
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java
 @@ -0,0 +1,48 @@
@@ -58379,39 +54937,9 @@ index 000000000..4789e664c
 +    return debugger.newAddress(getRegister(index));
 +  }
 +}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
-index 74e957d94..1f44d75ee 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
-@@ -32,12 +32,14 @@ import sun.jvm.hotspot.debugger.*;
- import sun.jvm.hotspot.debugger.cdbg.*;
- import sun.jvm.hotspot.debugger.proc.amd64.*;
- import sun.jvm.hotspot.debugger.proc.aarch64.*;
-+import sun.jvm.hotspot.debugger.proc.riscv64.*;
- import sun.jvm.hotspot.debugger.proc.sparc.*;
- import sun.jvm.hotspot.debugger.proc.ppc64.*;
- import sun.jvm.hotspot.debugger.proc.x86.*;
- import sun.jvm.hotspot.debugger.ppc64.*;
- import sun.jvm.hotspot.debugger.amd64.*;
- import sun.jvm.hotspot.debugger.aarch64.*;
-+import sun.jvm.hotspot.debugger.riscv64.*;
- import sun.jvm.hotspot.debugger.sparc.*;
- import sun.jvm.hotspot.debugger.x86.*;
- import sun.jvm.hotspot.utilities.*;
-@@ -94,6 +96,10 @@ public class ProcDebuggerLocal extends DebuggerBase implements ProcDebugger {
-             threadFactory = new ProcAARCH64ThreadFactory(this);
-             pcRegIndex = AARCH64ThreadContext.PC;
-             fpRegIndex = AARCH64ThreadContext.FP;
-+        } else if (cpu.equals("riscv64")) {
-+            threadFactory = new ProcRISCV64ThreadFactory(this);
-+            pcRegIndex = RISCV64ThreadContext.PC;
-+            fpRegIndex = RISCV64ThreadContext.FP;
-         } else if (cpu.equals("ppc64")) {
-             threadFactory = new ProcPPC64ThreadFactory(this);
-             pcRegIndex = PPC64ThreadContext.PC;
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
 new file mode 100644
-index 000000000..c1cf1fb0f
+index 0000000000..96d5dee47c
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java
 @@ -0,0 +1,88 @@
@@ -58505,7 +55033,7 @@ index 000000000..c1cf1fb0f
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
 new file mode 100644
-index 000000000..498fa0dc6
+index 0000000000..f2aa845e66
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java
 @@ -0,0 +1,48 @@
@@ -58559,7 +55087,7 @@ index 000000000..498fa0dc6
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
 new file mode 100644
-index 000000000..81afd8fdc
+index 0000000000..19f64b8ce2
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java
 @@ -0,0 +1,46 @@
@@ -58611,7 +55139,7 @@ index 000000000..81afd8fdc
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
 new file mode 100644
-index 000000000..ab92e3e74
+index 0000000000..aecbda5902
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java
 @@ -0,0 +1,55 @@
@@ -58672,7 +55200,7 @@ index 000000000..ab92e3e74
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
 new file mode 100644
-index 000000000..1e8cd19b2
+index 0000000000..1d3da6be5a
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java
 @@ -0,0 +1,48 @@
@@ -58726,7 +55254,7 @@ index 000000000..1e8cd19b2
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
 new file mode 100644
-index 000000000..eecb6e029
+index 0000000000..725b94e25a
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java
 @@ -0,0 +1,46 @@
@@ -58778,7 +55306,7 @@ index 000000000..eecb6e029
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
 new file mode 100644
-index 000000000..426ff0580
+index 0000000000..fb60a70427
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java
 @@ -0,0 +1,172 @@
@@ -58828,9 +55356,9 @@ index 000000000..426ff0580
 +    //   */
 +    // struct sigcontext {
 +    //   struct user_regs_struct sc_regs;
-+    //   union __riscv_fp_state sc_fpregs; 
++    //   union __riscv_fp_state sc_fpregs;
 +    // };
-+    // 
++    //
 +    // struct user_regs_struct {
 +    //    unsigned long pc;
 +    //    unsigned long ra;
@@ -58955,9 +55483,16 @@ index 000000000..426ff0580
 +    public abstract Address getRegisterAsAddress(int index);
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
-index 190062785..74bd614d3 100644
+index 190062785a..89d676fe3b 100644
 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.win32_aarch64.Win32AARCH64JavaThreadPDAccess;
  import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
  import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
@@ -58977,7 +55512,7 @@ index 190062785..74bd614d3 100644
                  access = (JavaThreadPDAccess)
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
 new file mode 100644
-index 000000000..2df0837b6
+index 0000000000..5c2b6e0e3e
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java
 @@ -0,0 +1,132 @@
@@ -59115,7 +55650,7 @@ index 000000000..2df0837b6
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
 new file mode 100644
-index 000000000..a3bbf1ad1
+index 0000000000..34701c6922
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java
 @@ -0,0 +1,223 @@
@@ -59344,14 +55879,14 @@ index 000000000..a3bbf1ad1
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
 new file mode 100644
-index 000000000..c04def5a1
+index 0000000000..e372bc5f7b
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java
 @@ -0,0 +1,554 @@
 +/*
 + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2019, Red Hat Inc.
-+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -59735,11 +56270,11 @@ index 000000000..c04def5a1
 +    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
 +
 +    // The return_address is always the word on the stack
-+    Address senderPC = senderSP.getAddressAt(RETURN_ADDR_OFFSET * VM.getVM().getAddressSize());
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
 +
 +    // This is the saved value of FP which may or may not really be an FP.
 +    // It is only an FP if the sender is an interpreter frame.
-+    Address savedFPAddr = senderSP.addOffsetTo(LINK_OFFSET * VM.getVM().getAddressSize());
++    Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize());
 +
 +    if (map.getUpdateMap()) {
 +      // Tell GC to use argument oopmaps for some runtime stubs that need it.
@@ -59904,10 +56439,10 @@ index 000000000..c04def5a1
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
 new file mode 100644
-index 000000000..4d79e3ee4
+index 0000000000..850758a7ed
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java
-@@ -0,0 +1,58 @@
+@@ -0,0 +1,59 @@
 +/*
 + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, Red Hat Inc.
@@ -59940,6 +56475,7 @@ index 000000000..4d79e3ee4
 +import sun.jvm.hotspot.debugger.*;
 +import sun.jvm.hotspot.types.*;
 +import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.utilities.*;
 +
 +public class RISCV64JavaCallWrapper extends JavaCallWrapper {
 +  private static AddressField lastJavaFPField;
@@ -59968,7 +56504,7 @@ index 000000000..4d79e3ee4
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
 new file mode 100644
-index 000000000..d7187a5f8
+index 0000000000..4aeb1c6f55
 --- /dev/null
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java
 @@ -0,0 +1,53 @@
@@ -60026,9 +56562,16 @@ index 000000000..d7187a5f8
 +  protected Address getLocationPD(VMReg reg) { return null; }
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-index 7d7a6107c..948eabcab 100644
+index 7d7a6107ca..6552ce255f 100644
 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -54,7 +54,7 @@ public class PlatformInfo {
  
    public static boolean knownCPU(String cpu) {
@@ -60038,42 +56581,18 @@ index 7d7a6107c..948eabcab 100644
  
      for(String s : KNOWN) {
        if(s.equals(cpu))
-diff --git a/src/utils/hsdis/hsdis.c b/src/utils/hsdis/hsdis.c
-index d0a6f4ea8..a29c7bf8b 100644
---- a/src/utils/hsdis/hsdis.c
-+++ b/src/utils/hsdis/hsdis.c
-@@ -28,9 +28,6 @@
- */
- 
- #include <config.h> /* required by bfd.h */
--#include <errno.h>
--#include <inttypes.h>
--#include <string.h>
- 
- #include <libiberty.h>
- #include <bfd.h>
-@@ -479,6 +476,9 @@ static const char* native_arch_name() {
- #endif
- #ifdef LIBARCH_s390x
-   res = "s390:64-bit";
-+#endif
-+#ifdef LIBARCH_riscv64
-+  res = "riscv:rv64";
- #endif
-   if (res == NULL)
-     res = "architecture not set in Makefile!";
 diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java
-index 7805918c2..a21307083 100644
+index 7805918c28..823b9f39db 100644
 --- a/test/hotspot/jtreg/compiler/c2/TestBit.java
 +++ b/test/hotspot/jtreg/compiler/c2/TestBit.java
-@@ -1,5 +1,6 @@
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -34,7 +35,7 @@ import jdk.test.lib.process.ProcessTools;
+@@ -34,7 +34,7 @@ import jdk.test.lib.process.ProcessTools;
   *
   * @run driver compiler.c2.TestBit
   *
@@ -60082,7 +56601,7 @@ index 7805918c2..a21307083 100644
   * @requires vm.debug == true & vm.compiler2.enabled
   */
  public class TestBit {
-@@ -54,7 +55,8 @@ public class TestBit {
+@@ -54,7 +54,8 @@ public class TestBit {
          String expectedTestBitInstruction =
              "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" :
              "aarch64".equals(System.getProperty("os.arch")) ? "tb"   :
@@ -60092,26 +56611,112 @@ index 7805918c2..a21307083 100644
  
          if (expectedTestBitInstruction != null) {
              output.shouldContain(expectedTestBitInstruction);
+diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
+new file mode 100644
+index 0000000000..5a1b659bbe
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java
+@@ -0,0 +1,80 @@
++/*
++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Alibaba Group Holding Limited. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++/*
++ * @test
++ * @summary Test libm intrinsics
++ * @library /test/lib /
++ *
++ * @build sun.hotspot.WhiteBox
++ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
++ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
++ *                   -XX:-BackgroundCompilation -XX:-UseOnStackReplacement
++ *                   compiler.floatingpoint.TestLibmIntrinsics
++ */
++
++package compiler.floatingpoint;
++
++import compiler.whitebox.CompilerWhiteBoxTest;
++import sun.hotspot.WhiteBox;
++
++import java.lang.reflect.Method;
++
++public class TestLibmIntrinsics {
++
++    private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox();
++
++    private static final double pi = 3.1415926;
++
++    private static final double expected = 2.5355263553695413;
++
++    static double m() {
++        return Math.pow(pi, Math.sin(Math.cos(Math.tan(Math.log(Math.log10(Math.exp(pi)))))));
++    }
++
++    static public void main(String[] args) throws NoSuchMethodException {
++        Method test_method = compiler.floatingpoint.TestLibmIntrinsics.class.getDeclaredMethod("m");
++
++        double interpreter_result = m();
++
++        // Compile with C1 if possible
++        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE);
++
++        double c1_result = m();
++
++        WHITE_BOX.deoptimizeMethod(test_method);
++
++        // Compile it with C2 if possible
++        WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
++
++        double c2_result = m();
++
++        if (interpreter_result != c1_result ||
++            interpreter_result != c2_result ||
++            c1_result != c2_result) {
++            System.out.println("interpreter = " + interpreter_result + " c1 = " + c1_result + " c2 = " + c2_result);
++            throw new RuntimeException("Test Failed");
++        }
++    }
++}
 diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
-index 558b4218f..9d875e33f 100644
+index 558b4218f0..55374b116e 100644
 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
 +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java
-@@ -1,5 +1,6 @@
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli;
- 
+@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
  import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
-@@ -54,6 +56,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU {
+ 
+@@ -54,6 +55,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU {
                          SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
                  new GenericTestCaseForUnsupportedAArch64CPU(
                          SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
@@ -60121,25 +56726,25 @@ index 558b4218f..9d875e33f 100644
                          SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION),
                  new GenericTestCaseForOtherCPU(
 diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
-index 3ed72bf0a..a7e277060 100644
+index 3ed72bf0a9..8fb82ee453 100644
 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
 +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java
-@@ -1,5 +1,6 @@
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli;
- 
+@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
  import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
-@@ -54,6 +56,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU {
+ 
+@@ -54,6 +55,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU {
                          SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
                  new GenericTestCaseForUnsupportedAArch64CPU(
                          SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
@@ -60149,25 +56754,25 @@ index 3ed72bf0a..a7e277060 100644
                          SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION),
                  new GenericTestCaseForOtherCPU(
 diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
-index c05cf309d..e714fcc59 100644
+index c05cf309da..aca32137ed 100644
 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
 +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java
-@@ -1,5 +1,6 @@
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli;
- 
+@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
  import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU;
-@@ -54,6 +56,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU {
+ 
+@@ -54,6 +55,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU {
                          SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
                  new GenericTestCaseForUnsupportedAArch64CPU(
                          SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
@@ -60177,25 +56782,25 @@ index c05cf309d..e714fcc59 100644
                          SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION),
                  new GenericTestCaseForOtherCPU(
 diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
-index 58ce5366b..d52d81e26 100644
+index 58ce5366ba..8deac4f789 100644
 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
 +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java
-@@ -1,5 +1,6 @@
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -40,6 +41,7 @@ package compiler.intrinsics.sha.cli;
- 
+@@ -41,6 +41,7 @@ package compiler.intrinsics.sha.cli;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU;
-+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU;
++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU;
  import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU;
  import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU;
-@@ -53,6 +55,8 @@ public class TestUseSHAOptionOnUnsupportedCPU {
+ 
+@@ -53,6 +54,8 @@ public class TestUseSHAOptionOnUnsupportedCPU {
                          SHAOptionsBase.USE_SHA_OPTION),
                  new GenericTestCaseForUnsupportedAArch64CPU(
                          SHAOptionsBase.USE_SHA_OPTION),
@@ -60205,17 +56810,17 @@ index 58ce5366b..d52d81e26 100644
                          SHAOptionsBase.USE_SHA_OPTION),
                  new GenericTestCaseForOtherCPU(
 diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
-index faa9fdbae..50e549069 100644
+index faa9fdbae6..2663500204 100644
 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
 +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
-@@ -1,5 +1,6 @@
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -32,26 +33,27 @@ import jdk.test.lib.cli.predicate.OrPredicate;
+@@ -32,26 +32,27 @@ import jdk.test.lib.cli.predicate.OrPredicate;
  
  /**
   * Generic test case for SHA-related options targeted to any CPU except
@@ -60243,19 +56848,19 @@ index faa9fdbae..50e549069 100644
          String shouldPassMessage = String.format("JVM should start with "
                  + "option '%s' without any warnings", optionName);
 -        // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of
-+        // Verify that on non-x86, non-SPARC, non-AArch64 CPU and non-RISCV64 usage of
++        // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of
          //  SHA-related options will not cause any warnings.
          CommandLineOptionTest.verifySameJVMStartup(null,
                  new String[] { ".*" + optionName + ".*" }, shouldPassMessage,
 diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
 new file mode 100644
-index 000000000..d81b5b53f
+index 0000000000..8566d57c39
 --- /dev/null
 +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java
-@@ -0,0 +1,102 @@
+@@ -0,0 +1,115 @@
 +/*
 + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -60292,10 +56897,19 @@ index 000000000..d81b5b53f
 + */
 +public class GenericTestCaseForUnsupportedRISCV64CPU extends
 +        SHAOptionsBase.TestCase {
++
++    final private boolean checkUseSHA;
++
 +    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) {
++        this(optionName, true);
++    }
++
++    public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) {
 +        super(optionName, new AndPredicate(Platform::isRISCV64,
 +                new NotPredicate(SHAOptionsBase.getPredicateForOption(
 +                        optionName))));
++
++        this.checkUseSHA = checkUseSHA;
 +    }
 +
 +    @Override
@@ -60309,22 +56923,24 @@ index 000000000..d81b5b53f
 +                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
 +                CommandLineOptionTest.prepareBooleanFlag(optionName, false));
 +
-+        shouldPassMessage = String.format("If JVM is started with '-XX:-"
-+                + "%s' '-XX:+%s', output should contain warning.",
-+                SHAOptionsBase.USE_SHA_OPTION, optionName);
-+
-+        // Verify that when the tested option is enabled, then
-+        // a warning will occur in VM output if UseSHA is disabled.
-+        if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
-+            CommandLineOptionTest.verifySameJVMStartup(
-+                    new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
-+                    null,
-+                    shouldPassMessage,
-+                    shouldPassMessage,
-+                    ExitCode.OK,
-+                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                    CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
-+                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
++        if (checkUseSHA) {
++            shouldPassMessage = String.format("If JVM is started with '-XX:-"
++                    + "%s' '-XX:+%s', output should contain warning.",
++                    SHAOptionsBase.USE_SHA_OPTION, optionName);
++
++            // Verify that when the tested option is enabled, then
++            // a warning will occur in VM output if UseSHA is disabled.
++            if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
++                CommandLineOptionTest.verifySameJVMStartup(
++                        new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
++                        null,
++                        shouldPassMessage,
++                        shouldPassMessage,
++                        ExitCode.OK,
++                        SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                        CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
++                        CommandLineOptionTest.prepareBooleanFlag(optionName, true));
++            }
 +        }
 +    }
 +
@@ -60336,188 +56952,38 @@ index 000000000..d81b5b53f
 +                        optionName),
 +                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS);
 +
-+        // Verify that option is disabled even if it was explicitly enabled
-+        // using CLI options.
-+        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-+                String.format("Option '%s' should be off on unsupported "
-+                        + "RISCV64CPU even if set to true directly", optionName),
-+                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                CommandLineOptionTest.prepareBooleanFlag(optionName, true));
-+
-+        // Verify that option is disabled when +UseSHA was passed to JVM.
-+        CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
-+                String.format("Option '%s' should be off on unsupported "
-+                        + "RISCV64CPU even if %s flag set to JVM",
-+                        optionName, CommandLineOptionTest.prepareBooleanFlag(
-+                            SHAOptionsBase.USE_SHA_OPTION, true)),
-+                SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
-+                CommandLineOptionTest.prepareBooleanFlag(
-+                        SHAOptionsBase.USE_SHA_OPTION, true));
-+    }
-+}
-diff --git a/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java
-new file mode 100644
-index 000000000..d3aafec8e
---- /dev/null
-+++ b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java
-@@ -0,0 +1,153 @@
-+/*
-+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ */
-+
-+/*
-+ * @test
-+ * @bug 8173585
-+ * @summary Test intrinsification of StringLatin1.indexOf(char). Note that
-+ * differing code paths are taken contingent upon the length of the input String.
-+ * Hence we must test against differing string lengths in order to validate
-+ * correct functionality. We also ensure the strings are long enough to trigger
-+ * the looping conditions of the individual code paths.
-+ *
-+ * Run with varing levels of AVX and SSE support, also without the intrinsic at all
-+ *
-+ * @library /compiler/patches /test/lib
-+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 compiler.intrinsics.string.TestStringLatin1IndexOfChar
-+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+UnlockDiagnosticVMOptions -XX:DisableIntrinsic=_indexOfL_char compiler.intrinsics.string.TestStringLatin1IndexOfChar
-+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseSSE=0 compiler.intrinsics.string.TestStringLatin1IndexOfChar
-+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=1 compiler.intrinsics.string.TestStringLatin1IndexOfChar
-+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=2 compiler.intrinsics.string.TestStringLatin1IndexOfChar
-+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=3 compiler.intrinsics.string.TestStringLatin1IndexOfChar
-+ */
-+
-+package compiler.intrinsics.string;
-+
-+import jdk.test.lib.Asserts;
-+
-+public class TestStringLatin1IndexOfChar{
-+    private final static int MAX_LENGTH = 2048;//future proof for AVX-512 instructions
-+
-+    public static void main(String[] args) throws Exception {
-+        for (int i = 0; i < 1_000; ++i) {//repeat such that we enter into C2 code...
-+            findOneItem();
-+            withOffsetTest();
-+            testEmpty();
-+        }
-+    }
-+
-+    private static void testEmpty(){
-+        Asserts.assertEQ("".indexOf('a'), -1);
-+    }
-+
-+    private final static char SEARCH_CHAR = 'z';
-+    private final static char INVERLEAVING_CHAR = 'a';
-+    private final static char MISSING_CHAR = 'd';
-+
-+    private static void findOneItem(){
-+        //test strings of varying length ensuring that for all lengths one instance of the
-+        //search char can be found. We check what happens when the search character is in
-+        //each position of the search string (including first and last positions)
-+        for(int strLength : new int[]{1, 15, 31, 32, 79}){
-+            for(int searchPos = 0; searchPos < strLength; searchPos++){
-+                String totest = makeOneItemStringLatin1(strLength, searchPos);
-+
-+                int intri = totest.indexOf(SEARCH_CHAR);
-+                int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0);
-+                Asserts.assertEQ(intri, nonintri);
-+            }
-+        }
-+    }
-+
-+    private static String makeOneItemStringLatin1(int length, int searchPos){
-+        StringBuilder sb = new StringBuilder(length);
-+
-+        for(int n =0; n < length; n++){
-+            sb.append(searchPos==n?SEARCH_CHAR:INVERLEAVING_CHAR);
-+        }
-+
-+        return sb.toString();
-+    }
-+
-+    private static void withOffsetTest(){
-+        //progressivly move through string checking indexes and starting offset correctly processed
-+        //string is of form azaza, aazaazaa, aaazaaazaaa, etc
-+        //we find n s.t. maxlength = (n*3) + 2
-+        int maxaInstances = (MAX_LENGTH-2)/3;
-+
-+        for(int aInstances = 5; aInstances < MAX_LENGTH; aInstances++){
-+            String totest = makeWithOffsetStringLatin1(aInstances);
-+
-+            int startoffset;
-+            {
-+                int intri = totest.indexOf(SEARCH_CHAR);
-+                int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0);
-+
-+                Asserts.assertEQ(intri, nonintri);
-+                startoffset = intri+1;
-+            }
-+
-+            {
-+                int intri = totest.indexOf(SEARCH_CHAR, startoffset);
-+                int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, startoffset);
-+
-+                Asserts.assertEQ(intri, nonintri);
-+                startoffset = intri+1;
-+            }
-+
-+            Asserts.assertEQ(totest.indexOf(SEARCH_CHAR, startoffset), -1);//only two SEARCH_CHAR per string
-+            Asserts.assertEQ(totest.indexOf(MISSING_CHAR), -1);
-+        }
-+    }
-+
-+    private static String makeWithOffsetStringLatin1(int aInstances){
-+        StringBuilder sb = new StringBuilder((aInstances*3) + 2);
-+        for(int n =0; n < aInstances; n++){
-+            sb.append(INVERLEAVING_CHAR);
-+        }
-+
-+        sb.append(SEARCH_CHAR);
-+
-+        for(int n =0; n < aInstances; n++){
-+            sb.append(INVERLEAVING_CHAR);
-+        }
-+
-+        sb.append(SEARCH_CHAR);
-+
-+        for(int n =0; n < aInstances; n++){
-+            sb.append(INVERLEAVING_CHAR);
-+        }
-+        return sb.toString();
-+    }
++        if (checkUseSHA) {
++            // Verify that option is disabled even if it was explicitly enabled
++            // using CLI options.
++            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
++                    String.format("Option '%s' should be off on unsupported "
++                            + "RISCV64CPU even if set to true directly", optionName),
++                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
 +
-+    private static int indexOfCharNonIntrinsic(String value, int ch, int fromIndex) {
-+        //non intrinsic version of indexOfChar
-+        byte c = (byte)ch;
-+        for (int i = fromIndex; i < value.length(); i++) {
-+            if (value.charAt(i) == c) {
-+               return i;
-+            }
++            // Verify that option is disabled when +UseSHA was passed to JVM.
++            CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false",
++                    String.format("Option '%s' should be off on unsupported "
++                            + "RISCV64CPU even if %s flag set to JVM",
++                            optionName, CommandLineOptionTest.prepareBooleanFlag(
++                                  SHAOptionsBase.USE_SHA_OPTION, true)),
++                    SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS,
++                    CommandLineOptionTest.prepareBooleanFlag(
++                            SHAOptionsBase.USE_SHA_OPTION, true));
 +        }
-+        return -1;
 +    }
 +}
 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
-index 2e3e2717a..8093d6598 100644
+index 2e3e2717a6..7be8af6d03 100644
 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
 +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -25,7 +25,7 @@
   * @test
   * @bug 8074981
@@ -60528,9 +56994,16 @@ index 2e3e2717a..8093d6598 100644
   * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
   *      -XX:CompileThresholdScaling=0.1
 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
-index 0e06a9e43..1ff9f36e1 100644
+index 0e06a9e432..797927b42b 100644
 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
 +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -25,7 +25,7 @@
   * @test
   * @bug 8074981
@@ -60541,9 +57014,16 @@ index 0e06a9e43..1ff9f36e1 100644
   * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
   *      -XX:CompileThresholdScaling=0.1
 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
-index c3cdbf374..f3531ea74 100644
+index c3cdbf3746..be8f7d586c 100644
 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
 +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -25,7 +25,7 @@
   * @test
   * @bug 8074981
@@ -60554,9 +57034,16 @@ index c3cdbf374..f3531ea74 100644
   * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
   *      -XX:CompileThresholdScaling=0.1
 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
-index d33bd411f..589209447 100644
+index d33bd411f1..d96d5e29c0 100644
 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
 +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -25,7 +25,7 @@
   * @test
   * @bug 8074981
@@ -60567,9 +57054,16 @@ index d33bd411f..589209447 100644
   * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions
   *      -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1
 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
-index 992fa4b51..907e21371 100644
+index 992fa4b516..b09c873d05 100644
 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
 +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -25,7 +25,7 @@
   * @test
   * @bug 8138583
@@ -60580,9 +57074,16 @@ index 992fa4b51..907e21371 100644
   * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
   *      -XX:CompileThresholdScaling=0.1
 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
-index 3e79b3528..c41c0b606 100644
+index 3e79b3528b..fe40ed6f98 100644
 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
 +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -25,7 +25,7 @@
   * @test
   * @bug 8138583
@@ -60593,9 +57094,16 @@ index 3e79b3528..c41c0b606 100644
   * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
   *      -XX:CompileThresholdScaling=0.1
 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
-index 6603dd224..b626da40d 100644
+index 6603dd224e..5163191049 100644
 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
 +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -25,7 +25,7 @@
   * @test
   * @bug 8135028
@@ -60606,9 +57114,16 @@ index 6603dd224..b626da40d 100644
   * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
   *      -XX:CompileThresholdScaling=0.1
 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
-index d9a0c9880..92cd84a2f 100644
+index d9a0c98800..d999ae423c 100644
 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
 +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -25,7 +25,7 @@
   * @test
   * @bug 8074981
@@ -60619,9 +57134,16 @@ index d9a0c9880..92cd84a2f 100644
   * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
   *      -XX:CompileThresholdScaling=0.1
 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
-index 722db95ae..e72345799 100644
+index 722db95aed..65912a5c7f 100644
 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
 +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -25,7 +25,7 @@
   * @test
   * @bug 8074981
@@ -60632,9 +57154,16 @@ index 722db95ae..e72345799 100644
   * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
   *      -XX:CompileThresholdScaling=0.1
 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
-index f58f21feb..f4f67cf52 100644
+index f58f21feb2..fffdc2f756 100644
 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
 +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -25,7 +25,7 @@
   * @test
   * @bug 8074981
@@ -60645,7 +57174,7 @@ index f58f21feb..f4f67cf52 100644
   * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250
   *      -XX:CompileThresholdScaling=0.1
 diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
-index acb86812d..c5e38ba72 100644
+index acb86812d2..2c866f26f0 100644
 --- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
 +++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
 @@ -24,7 +24,7 @@
@@ -60653,12 +57182,12 @@ index acb86812d..c5e38ba72 100644
  /* @test
   * @bug 8167409
 - * @requires (os.arch != "aarch64") & (os.arch != "arm")
-+ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64")
++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
   * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs
   */
  package compiler.runtime.criticalnatives.argumentcorruption;
 diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
-index eab36f931..4437367b6 100644
+index eab36f9311..1da369fde2 100644
 --- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
 +++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java
 @@ -24,7 +24,7 @@
@@ -60666,14 +57195,21 @@ index eab36f931..4437367b6 100644
  /* @test
   * @bug 8167408
 - * @requires (os.arch != "aarch64") & (os.arch != "arm")
-+ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64")
++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm")
   * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp
   */
  package compiler.runtime.criticalnatives.lookup;
 diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
-index 7774dabcb..284b51019 100644
+index 7774dabcb5..7afe3560f3 100644
 --- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
 +++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
 @@ -61,15 +61,17 @@ public class IntrinsicPredicates {
  
      public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE
@@ -60716,17 +57252,17 @@ index 7774dabcb..284b51019 100644
      public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE
              = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,
 diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
-index 57256aa5a..16c199e37 100644
+index 57256aa5a3..d4d43b01ae 100644
 --- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
 +++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java
-@@ -1,5 +1,6 @@
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -112,7 +113,7 @@ public class CheckForProperDetailStackTrace {
+@@ -112,7 +112,7 @@ public class CheckForProperDetailStackTrace {
              // It's ok for ARM not to have symbols, because it does not support NMT detail
              // when targeting thumb2. It's also ok for Windows not to have symbols, because
              // they are only available if the symbols file is included with the build.
@@ -60736,17 +57272,17 @@ index 57256aa5a..16c199e37 100644
              }
              output.reportDiagnosticSummary();
 diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
-index 127bb6abc..46be4dc98 100644
+index 127bb6abcd..eab19273ad 100644
 --- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
 +++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
-@@ -1,5 +1,6 @@
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -239,7 +240,7 @@ public class ReservedStackTest {
+@@ -239,7 +239,7 @@ public class ReservedStackTest {
          return Platform.isAix() ||
              (Platform.isLinux() &&
               (Platform.isPPC() || Platform.isS390x() || Platform.isX64() ||
@@ -60755,95 +57291,46 @@ index 127bb6abc..46be4dc98 100644
              Platform.isOSX() ||
              Platform.isSolaris();
      }
-diff --git a/test/hotspot/jtreg/test_env.sh b/test/hotspot/jtreg/test_env.sh
-index 0c300d4fd..7f3698c47 100644
---- a/test/hotspot/jtreg/test_env.sh
-+++ b/test/hotspot/jtreg/test_env.sh
-@@ -185,6 +185,11 @@ if [ $? = 0 ]
- then
-   VM_CPU="arm"
- fi
-+grep "riscv64" vm_version.out > ${NULL}
-+if [ $? = 0 ]
-+then
-+  VM_CPU="riscv64"
-+fi
- grep "ppc" vm_version.out > ${NULL}
- if [ $? = 0 ]
- then
 diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-index 77458554b..73e92855d 100644
+index 126a43a900..feb4de5388 100644
 --- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
 +++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-@@ -1,5 +1,6 @@
- /*
-  * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -45,7 +46,7 @@ import java.util.Set;
+@@ -45,7 +45,7 @@ import java.util.Set;
   */
  public class TestMutuallyExclusivePlatformPredicates {
      private static enum MethodGroup {
 -        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
-+        ARCH("isRISCV64", "isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
++        ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
          BITNESS("is32bit", "is64bit"),
          OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
          VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
-diff --git a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java
-index cb3348a0f..bc0d1a743 100644
---- a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java
-+++ b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java
-@@ -63,13 +63,13 @@ public class thrinfo001 {
-         try {
-             t_a.join();
-         } catch (InterruptedException e) {}
-+        checkInfo(t_a, t_a.getThreadGroup(), 1);
- 
-         thrinfo001b t_b = new thrinfo001b();
-         t_b.setPriority(Thread.MIN_PRIORITY);
-         t_b.setDaemon(true);
-         checkInfo(t_b, t_b.getThreadGroup(), 2);
-         t_b.start();
--        checkInfo(t_b, t_b.getThreadGroup(), 2);
-         try {
-             t_b.join();
-         } catch (InterruptedException e) {}
 diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
-index 7990c49a1..bb8c79cdd 100644
+index 7990c49a1f..abeff80e5e 100644
 --- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
 +++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
-@@ -1,5 +1,6 @@
+@@ -1,5 +1,5 @@
  /*
-  * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
+- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
-@@ -54,8 +55,8 @@ public class TestCPUInformation {
+@@ -54,8 +54,8 @@ public class TestCPUInformation {
              Events.assertField(event, "hwThreads").atLeast(1);
              Events.assertField(event, "cores").atLeast(1);
              Events.assertField(event, "sockets").atLeast(1);
 -            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
 -            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
-+            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64");
-+            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64");
++            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
++            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390");
          }
      }
  }
 diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
-index f4ee0546c..a9cd63db9 100644
+index 6269373c2b..e1511772e7 100644
 --- a/test/lib/jdk/test/lib/Platform.java
 +++ b/test/lib/jdk/test/lib/Platform.java
-@@ -1,5 +1,6 @@
- /*
-  * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved.
-  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-  *
-  * This code is free software; you can redistribute it and/or modify it
-@@ -202,6 +203,10 @@ public class Platform {
+@@ -205,6 +205,10 @@ public class Platform {
          return isArch("arm.*");
      }
  
@@ -60854,233 +57341,3 @@ index f4ee0546c..a9cd63db9 100644
      public static boolean isPPC() {
          return isArch("ppc.*");
      }
-diff --git a/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java
-new file mode 100644
-index 000000000..6852c0540
---- /dev/null
-+++ b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java
-@@ -0,0 +1,221 @@
-+/*
-+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ */
-+package org.openjdk.bench.java.lang;
-+
-+import java.util.Random;
-+import org.openjdk.jmh.annotations.Benchmark;
-+import org.openjdk.jmh.annotations.BenchmarkMode;
-+import org.openjdk.jmh.annotations.OutputTimeUnit;
-+import org.openjdk.jmh.annotations.Mode;
-+import org.openjdk.jmh.annotations.Scope;
-+import org.openjdk.jmh.annotations.State;
-+
-+import java.util.concurrent.TimeUnit;
-+
-+/**
-+ * This benchmark can be used to measure performance between StringLatin1 and StringUTF16 in terms of
-+ * performance of the indexOf(char) and indexOf(String) methods which are intrinsified.
-+ * On x86 the behaviour of the indexOf method is contingent upon the length of the string
-+ */
-+@BenchmarkMode(Mode.AverageTime)
-+@OutputTimeUnit(TimeUnit.NANOSECONDS)
-+@State(Scope.Thread)
-+public class IndexOfBenchmark {
-+    private static final int loops = 100000;
-+    private static final Random rng = new Random(1999);
-+    private static final int pathCnt = 1000;
-+    private static final String [] latn1_short        = new String[pathCnt];
-+    private static final String [] latn1_sse4         = new String[pathCnt];
-+    private static final String [] latn1_avx2         = new String[pathCnt];
-+    private static final String [] latn1_mixedLength  = new String[pathCnt];
-+    private static final String [] utf16_short        = new String[pathCnt];
-+    private static final String [] utf16_sse4         = new String[pathCnt];
-+    private static final String [] utf16_avx2         = new String[pathCnt];
-+    private static final String [] utf16_mixedLength  = new String[pathCnt];
-+    static {
-+        for (int i = 0; i < pathCnt; i++) {
-+            latn1_short[i] = makeRndString(false, 15);
-+            latn1_sse4[i]  = makeRndString(false, 16);
-+            latn1_avx2[i]  = makeRndString(false, 32);
-+            utf16_short[i] = makeRndString(true, 7);
-+            utf16_sse4[i]  = makeRndString(true, 8);
-+            utf16_avx2[i]  = makeRndString(true, 16);
-+            latn1_mixedLength[i] = makeRndString(false, rng.nextInt(65));
-+            utf16_mixedLength[i] = makeRndString(true, rng.nextInt(65));
-+        }
-+    }
-+
-+    private static String makeRndString(boolean isUtf16, int length) {
-+        StringBuilder sb = new StringBuilder(length);
-+        if(length > 0){
-+            sb.append(isUtf16?'☺':'b');
-+
-+            for (int i = 1; i < length-1; i++) {
-+                sb.append((char)('b' + rng.nextInt(26)));
-+            }
-+
-+            sb.append(rng.nextInt(3) >= 1?'a':'b');//66.6% of time 'a' is in string
-+        }
-+        return sb.toString();
-+    }
-+
-+
-+    @Benchmark
-+    public static void latin1_mixed_char() {
-+        int ret = 0;
-+        for (String what : latn1_mixedLength) {
-+            ret += what.indexOf('a');
-+        }
-+    }
-+
-+    @Benchmark
-+    public static void utf16_mixed_char() {
-+        int ret = 0;
-+        for (String what : utf16_mixedLength) {
-+            ret += what.indexOf('a');
-+        }
-+    }
-+
-+    @Benchmark
-+    public static void latin1_mixed_String() {
-+        int ret = 0;
-+        for (String what : latn1_mixedLength) {
-+            ret += what.indexOf("a");
-+        }
-+    }
-+
-+    @Benchmark
-+    public static void utf16_mixed_String() {
-+        int ret = 0;
-+        for (String what : utf16_mixedLength) {
-+            ret += what.indexOf("a");
-+        }
-+    }
-+
-+    ////////// more detailed code path dependent tests //////////
-+
-+    @Benchmark
-+    public static void latin1_Short_char() {
-+        int ret = 0;
-+        for (String what : latn1_short) {
-+            ret += what.indexOf('a');
-+        }
-+    }
-+
-+    @Benchmark
-+    public static void latin1_SSE4_char() {
-+        int ret = 0;
-+        for (String what : latn1_sse4) {
-+            ret += what.indexOf('a');
-+        }
-+    }
-+
-+    @Benchmark
-+    public static void latin1_AVX2_char() {
-+        int ret = 0;
-+        for (String what : latn1_avx2) {
-+            ret += what.indexOf('a');
-+        }
-+    }
-+
-+    @Benchmark
-+    public static int utf16_Short_char() {
-+        int ret = 0;
-+        for (String what : utf16_short) {
-+            ret += what.indexOf('a');
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int utf16_SSE4_char() {
-+        int ret = 0;
-+        for (String what : utf16_sse4) {
-+            ret += what.indexOf('a');
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int utf16_AVX2_char() {
-+        int ret = 0;
-+        for (String what : utf16_avx2) {
-+            ret += what.indexOf('a');
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int latin1_Short_String() {
-+        int ret = 0;
-+        for (String what : latn1_short) {
-+            ret += what.indexOf("a");
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int latin1_SSE4_String() {
-+        int ret = 0;
-+        for (String what : latn1_sse4) {
-+            ret += what.indexOf("a");
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int latin1_AVX2_String() {
-+        int ret = 0;
-+        for (String what : latn1_avx2) {
-+            ret += what.indexOf("a");
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int utf16_Short_String() {
-+        int ret = 0;
-+        for (String what : utf16_short) {
-+            ret += what.indexOf("a");
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int utf16_SSE4_String() {
-+        int ret = 0;
-+        for (String what : utf16_sse4) {
-+            ret += what.indexOf("a");
-+        }
-+        return ret;
-+    }
-+
-+    @Benchmark
-+    public static int utf16_AVX2_String() {
-+        int ret = 0;
-+        for (String what : utf16_avx2) {
-+            ret += what.indexOf("a");
-+        }
-+        return ret;
-+    }
-+}
--- 
-2.40.0.windows.1
-
diff --git a/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch b/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch
index e395c0b7210708ddc2baa7dbc7fb96600f5e8cf5..8d4548aad36df00f937ee2babb039206bb059a35 100755
--- a/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch
+++ b/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch
@@ -367,4 +367,4 @@ index 000000000..85b49171c
 --- /dev/null
 +++ b/version.txt
 @@ -0,0 +1 @@
-+11.0.22.0.13
++11.0.24.0.13
diff --git a/LoongArch64-support.patch b/LoongArch64-support.patch
index e26615bbde0e0da8cb84cd049b290cd8b63f1919..9c2bb8354556a1dcd1cbf01de61e1125db177f12 100644
--- a/LoongArch64-support.patch
+++ b/LoongArch64-support.patch
@@ -27,7 +27,7 @@ index 46fb9b4219..c6d8b24fc4 100644
      org.graalvm.compiler.asm.sparc.test \
      org.graalvm.compiler.asm.test \
 diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
-index a3e1e00b2c..22f479120b 100644
+index 9bb34363e5..c95a2447ef 100644
 --- a/make/autoconf/hotspot.m4
 +++ b/make/autoconf/hotspot.m4
 @@ -34,6 +34,12 @@ DEPRECATED_JVM_FEATURES="trace"
@@ -43,7 +43,7 @@ index a3e1e00b2c..22f479120b 100644
  ###############################################################################
  # Check if the specified JVM variant should be built. To be used in shell if
  # constructs, like this:
-@@ -337,6 +343,26 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
+@@ -340,6 +346,26 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
      HOTSPOT_TARGET_CPU_ARCH=arm
    fi
  
@@ -70,7 +70,7 @@ index a3e1e00b2c..22f479120b 100644
    # Verify that dependencies are met for explicitly set features.
    if HOTSPOT_CHECK_JVM_FEATURE(jvmti) && ! HOTSPOT_CHECK_JVM_FEATURE(services); then
      AC_MSG_ERROR([Specified JVM feature 'jvmti' requires feature 'services'])
-@@ -421,10 +447,11 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
+@@ -424,10 +450,11 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
      JVM_FEATURES_jvmci=""
      INCLUDE_JVMCI="false"
    else
@@ -85,7 +85,7 @@ index a3e1e00b2c..22f479120b 100644
        JVM_FEATURES_jvmci="jvmci"
        INCLUDE_JVMCI="true"
 diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
-index f89b22f5fc..a7aa4f4aaa 100644
+index 5d1d9efa39..815180ea96 100644
 --- a/make/autoconf/platform.m4
 +++ b/make/autoconf/platform.m4
 @@ -23,6 +23,12 @@
@@ -141,10 +141,10 @@ index fdd2c0ca3d..318191233a 100644
    assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
  
 diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
-index cebc1e410d..816226c068 100644
+index 18b3ea147d..f3398e191c 100644
 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
 +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
-@@ -260,18 +260,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+@@ -262,18 +262,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
    __ store(reg, addr);
  }
  
@@ -1293,10 +1293,10 @@ index 0000000000..e6e62cccad
 +}
 diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp
 new file mode 100644
-index 0000000000..7d9d24f2cd
+index 0000000000..179da7bd0e
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp
-@@ -0,0 +1,2811 @@
+@@ -0,0 +1,2827 @@
 +/*
 + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
@@ -2613,6 +2613,15 @@ index 0000000000..7d9d24f2cd
 +  static int high6 (int x)        { return high(x, 6); }
 +
 +
++  static ALWAYSINLINE void patch(address a, int length, uint32_t val) {
++    guarantee(val < (1ULL << length), "Field too big for insn");
++    guarantee(length > 0, "length > 0");
++    unsigned target = *(unsigned *)a;
++    target = (target >> length) << length;
++    target |= val;
++    *(unsigned *)a = target;
++  }
++
 + protected:
 +  // help methods for instruction ejection
 +
@@ -3326,18 +3335,25 @@ index 0000000000..7d9d24f2cd
 +  void bceqz(ConditionalFlagRegister cj, Label& L)     { bceqz(cj, target(L)); }
 +  void bcnez(ConditionalFlagRegister cj, Label& L)     { bcnez(cj, target(L)); }
 +
-+  // Now Membar_mask_bits is 0,Need to fix it after LA6000
 +  typedef enum {
-+    StoreStore = 0,
-+    LoadStore  = 0,
-+    StoreLoad  = 0,
-+    LoadLoad   = 0,
-+    AnyAny     = 0
++    // hint[4]
++    Completion = 0,
++    Ordering   = (1 << 4),
++
++    // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation.
++    // hint[3:2] and hint[1:0]
++    LoadLoad   = ((1 << 3) | (1 << 1)),
++    LoadStore  = ((1 << 3) | (1 << 0)),
++    StoreLoad  = ((1 << 2) | (1 << 1)),
++    StoreStore = ((1 << 2) | (1 << 0)),
++    AnyAny     = ((3 << 2) | (3 << 0)),
 +  } Membar_mask_bits;
 +
 +  // Serializes memory and blows flags
 +  void membar(Membar_mask_bits hint) {
-+    dbar(hint);
++    assert((hint & (3 << 0)) != 0, "membar mask unsupported!");
++    assert((hint & (3 << 2)) != 0, "membar mask unsupported!");
++    dbar(Ordering | (~hint & 0xf));
 +  }
 +
 +  // LSX and LASX
@@ -8729,13 +8745,13 @@ index 0000000000..c989e25c3a
 +#undef __
 diff --git a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp
 new file mode 100644
-index 0000000000..72a80f37c4
+index 0000000000..6cb77f3fbe
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp
-@@ -0,0 +1,1396 @@
+@@ -0,0 +1,1398 @@
 +/*
 + * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
++ * Copyright (c) 2021, 2024, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -8891,8 +8907,10 @@ index 0000000000..72a80f37c4
 +  if (index->is_register()) {
 +    // apply the shift and accumulate the displacement
 +    if (shift > 0) {
-+      LIR_Opr tmp = new_pointer_register();
-+      __ shift_left(index, shift, tmp);
++      // Use long register to avoid overflow when shifting large index values left.
++      LIR_Opr tmp = new_register(T_LONG);
++      __ convert(Bytecodes::_i2l, index, tmp);
++      __ shift_left(tmp, shift, tmp);
 +      index = tmp;
 +    }
 +    if (large_disp != 0) {
@@ -12553,7 +12571,7 @@ index 0000000000..04359bc172
 +#endif // CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.cpp b/src/hotspot/cpu/loongarch/frame_loongarch.cpp
 new file mode 100644
-index 0000000000..9b4f3b88d4
+index 0000000000..6f6d34e026
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/frame_loongarch.cpp
 @@ -0,0 +1,690 @@
@@ -13102,7 +13120,7 @@ index 0000000000..9b4f3b88d4
 +
 +  // first the method
 +
-+  Method* m = *interpreter_frame_method_addr();
++  Method* m = safe_interpreter_frame_method();
 +
 +  // validate the method we'd find in this potential sender
 +  if (!Method::is_valid_method(m)) return false;
@@ -14645,13 +14663,13 @@ index 0000000000..a7ebbfaabb
 +#endif // CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp
 new file mode 100644
-index 0000000000..749d3a3f79
+index 0000000000..d09e9a75a7
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp
-@@ -0,0 +1,142 @@
+@@ -0,0 +1,140 @@
 +/*
 + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -14710,7 +14728,7 @@ index 0000000000..749d3a3f79
 +
 +  __ beq(count, R0, L_done); // zero count - nothing to do
 +
-+  if (UseConcMarkSweepGC) __ membar(__ StoreStore);
++  if (ct->scanned_concurrently()) __ membar(__ StoreStore);
 +
 +  __ li(tmp, disp);
 +
@@ -14753,8 +14771,6 @@ index 0000000000..749d3a3f79
 +
 +  jbyte dirty = CardTable::dirty_card_val();
 +  if (UseCondCardMark) {
-+    Untested("Untested");
-+    __ warn("store_check Untested");
 +    Label L_already_dirty;
 +    __ membar(__ StoreLoad);
 +    __ ld_b(AT, tmp, 0);
@@ -14764,7 +14780,7 @@ index 0000000000..749d3a3f79
 +    __ bind(L_already_dirty);
 +  } else {
 +    if (ct->scanned_concurrently()) {
-+      __ membar(Assembler::StoreLoad);
++      __ membar(Assembler::StoreStore);
 +    }
 +    __ st_b(R0, tmp, 0);
 +  }
@@ -18663,13 +18679,13 @@ index 0000000000..80dff0c762
 +
 diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad
 new file mode 100644
-index 0000000000..a5fb5f7b85
+index 0000000000..cc3824a402
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/loongarch_64.ad
-@@ -0,0 +1,13906 @@
+@@ -0,0 +1,13917 @@
 +//
 +// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
-+// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++// Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
 +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 +//
 +// This code is free software; you can redistribute it and/or modify it
@@ -28730,14 +28746,25 @@ index 0000000000..a5fb5f7b85
 +%}
 +
 +// Store CMS card-mark Immediate 0
++instruct storeImmCM_order(memory mem, immI_0 zero) %{
++  match(Set mem (StoreCM mem zero));
++  predicate(UseConcMarkSweepGC && !UseCondCardMark);
++  ins_cost(100);
++  format %{ "StoreCM MEMBAR storestore\n\t"
++            "st_b   $mem, zero\t! card-mark imm0" %}
++  ins_encode %{
++    __ membar(__ StoreStore);
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
 +instruct storeImmCM(memory mem, immI_0 zero) %{
 +  match(Set mem (StoreCM mem zero));
 +
 +  ins_cost(150);
-+  format %{ "StoreCM MEMBAR loadstore\n\t"
-+            "st_b   $mem, zero\t! CMS card-mark imm0" %}
++  format %{ "st_b   $mem, zero\t! card-mark imm0" %}
 +  ins_encode %{
-+    __ membar(__ StoreStore);
 +    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
 +  %}
 +  ins_pipe( ialu_storeI );
@@ -32575,13 +32602,13 @@ index 0000000000..a5fb5f7b85
 +
 diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp
 new file mode 100644
-index 0000000000..5d0c8c45fb
+index 0000000000..9720fd176d
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp
-@@ -0,0 +1,4563 @@
+@@ -0,0 +1,4567 @@
 +/*
 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
++ * Copyright (c) 2017, 2023, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -34407,7 +34434,7 @@ index 0000000000..5d0c8c45fb
 +
 +  bind(fail);
 +  if (barrier)
-+    membar(LoadLoad);
++    dbar(0x700);
 +  if (retold && oldval != R0)
 +    move(oldval, resflag);
 +  move(resflag, R0);
@@ -34430,7 +34457,7 @@ index 0000000000..5d0c8c45fb
 +
 +  bind(neq);
 +  if (barrier)
-+    membar(LoadLoad);
++    dbar(0x700);
 +  if (retold && oldval != R0)
 +    move(oldval, tmp);
 +  if (fail)
@@ -34455,7 +34482,7 @@ index 0000000000..5d0c8c45fb
 +
 +  bind(fail);
 +  if (barrier)
-+    membar(LoadLoad);
++    dbar(0x700);
 +  if (retold && oldval != R0)
 +    move(oldval, resflag);
 +  move(resflag, R0);
@@ -34480,7 +34507,7 @@ index 0000000000..5d0c8c45fb
 +
 +  bind(neq);
 +  if (barrier)
-+    membar(LoadLoad);
++    dbar(0x700);
 +  if (retold && oldval != R0)
 +    move(oldval, tmp);
 +  if (fail)
@@ -34858,7 +34885,7 @@ index 0000000000..5d0c8c45fb
 +      move(AT, R0);
 +      bnez(scrReg, DONE_SET);
 +
-+      membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore));
++      membar(Assembler::Membar_mask_bits(LoadStore|StoreStore));
 +      st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
 +      li(resReg, 1);
 +      b(DONE);
@@ -37063,10 +37090,14 @@ index 0000000000..5d0c8c45fb
 +  address last = code()->last_insn();
 +  if (last != NULL && ((NativeInstruction*)last)->is_sync() && prev == last) {
 +    code()->set_last_insn(NULL);
++    NativeMembar *membar = (NativeMembar*)prev;
++    // merged membar
++    // e.g. LoadLoad and LoadLoad|LoadStore to LoadLoad|LoadStore
++    membar->set_hint(membar->get_hint() & (~hint & 0xF));
 +    block_comment("merged membar");
 +  } else {
 +    code()->set_last_insn(pc());
-+    dbar(hint);
++    Assembler::membar(hint);
 +  }
 +}
 +
@@ -38015,13 +38046,13 @@ index 0000000000..49302590c3
 +#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
 diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
 new file mode 100644
-index 0000000000..3ed4c36651
+index 0000000000..6e27a69747
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
-@@ -0,0 +1,1625 @@
+@@ -0,0 +1,1626 @@
 +/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT)
-+ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * Copyright (c) 2022, 2024, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -38920,7 +38951,7 @@ index 0000000000..3ed4c36651
 +      b(Q_DONE);
 +    bind(JX_IS_0);
 +      if (UseLASX) {
-+        xvfmul_d(v28, v18, v6);                // f[0,1] * x[0]
++        xvfmul_d(v28, v18, v6);                // f[0,3] * x[0]
 +        fmul_d(v30, v19, v6);                  // f[4] * x[0]
 +      } else {
 +        vfmul_d(v28, v18, v6);                 // f[0,1] * x[0]
@@ -39149,6 +39180,7 @@ index 0000000000..3ed4c36651
 +          st_w(tmp2, SCR2, 0);
 +          addi_w(SCR1, SCR1, 24);
 +          addi_w(jz, jz, 1);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
 +          st_w(tmp3, SCR2, 0);                               // iq[jz] = (int) fw
 +          b(Z_ZERO_CHECK_DONE);
 +        bind(Z_IS_LESS_THAN_TWO24B);
@@ -40801,10 +40833,10 @@ index 0000000000..9234befae3
 +}
 diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp
 new file mode 100644
-index 0000000000..195a2df580
+index 0000000000..a6e9d4dd3c
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp
-@@ -0,0 +1,521 @@
+@@ -0,0 +1,528 @@
 +/*
 + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
@@ -41325,6 +41357,13 @@ index 0000000000..195a2df580
 +  assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found");
 +  return (NativeCallTrampolineStub*)addr;
 +}
++
++class NativeMembar : public NativeInstruction {
++public:
++  unsigned int get_hint() { return Assembler::low(insn_word(), 4); }
++  void set_hint(int hint) { Assembler::patch(addr_at(0), 4, hint); }
++};
++
 +#endif // CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp
 new file mode 100644
@@ -42441,7 +42480,7 @@ index 0000000000..334c783b37
 +}
 diff --git a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp
 new file mode 100644
-index 0000000000..736ed0a85f
+index 0000000000..bc91ee005e
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp
 @@ -0,0 +1,3621 @@
@@ -42760,9 +42799,9 @@ index 0000000000..736ed0a85f
 +}
 +
 +// Is vector's size (in bytes) bigger than a size saved by default?
-+// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
++// 8 bytes registers are saved by default using fld/fst instructions.
 +bool SharedRuntime::is_wide_vector(int size) {
-+  return size > 16;
++  return size > 8;
 +}
 +
 +size_t SharedRuntime::trampoline_size() {
@@ -46068,7 +46107,7 @@ index 0000000000..736ed0a85f
 +extern "C" int SpinPause() {return 0;}
 diff --git a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp
 new file mode 100644
-index 0000000000..0549c3c58f
+index 0000000000..7f73863b2e
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp
 @@ -0,0 +1,4804 @@
@@ -46781,8 +46820,8 @@ index 0000000000..0549c3c58f
 +
 +  // disjoint large copy
 +  void generate_disjoint_large_copy(Label &entry, const char *name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
 +    Label loop, le32, le16, le8, lt8;
 +
@@ -46862,8 +46901,8 @@ index 0000000000..0549c3c58f
 +
 +  // disjoint large copy lsx
 +  void generate_disjoint_large_copy_lsx(Label &entry, const char *name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
 +    Label loop, le64, le32, le16, lt16;
 +
@@ -46944,8 +46983,8 @@ index 0000000000..0549c3c58f
 +
 +  // disjoint large copy lasx
 +  void generate_disjoint_large_copy_lasx(Label &entry, const char *name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
 +    Label loop, le128, le64, le32, lt32;
 +
@@ -47026,8 +47065,8 @@ index 0000000000..0549c3c58f
 +
 +  // conjoint large copy
 +  void generate_conjoint_large_copy(Label &entry, const char *name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
 +    Label loop, le32, le16, le8, lt8;
 +
@@ -47104,8 +47143,8 @@ index 0000000000..0549c3c58f
 +
 +  // conjoint large copy lsx
 +  void generate_conjoint_large_copy_lsx(Label &entry, const char *name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
 +    Label loop, le64, le32, le16, lt16;
 +
@@ -47183,8 +47222,8 @@ index 0000000000..0549c3c58f
 +
 +  // conjoint large copy lasx
 +  void generate_conjoint_large_copy_lasx(Label &entry, const char *name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
 +    Label loop, le128, le64, le32, lt32;
 +
@@ -47262,8 +47301,8 @@ index 0000000000..0549c3c58f
 +
 +  // Byte small copy: less than { int:9, lsx:17, lasx:33 } elements.
 +  void generate_byte_small_copy(Label &entry, const char *name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
 +    Label L;
 +    __ bind(entry);
@@ -47628,8 +47667,8 @@ index 0000000000..0549c3c58f
 +  //
 +  address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large,
 +                                      Label &large_aligned, const char * name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
 +
 +    if (UseLASX)
@@ -47668,8 +47707,8 @@ index 0000000000..0549c3c58f
 +  //
 +  address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large,
 +                                      Label &large_aligned, const char *name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
 +
 +    array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0);
@@ -47695,8 +47734,8 @@ index 0000000000..0549c3c58f
 +
 +  // Short small copy: less than { int:9, lsx:9, lasx:17 } elements.
 +  void generate_short_small_copy(Label &entry, const char *name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
 +    Label L;
 +    __ bind(entry);
@@ -47907,8 +47946,8 @@ index 0000000000..0549c3c58f
 +  //
 +  address generate_disjoint_short_copy(bool aligned, Label &small, Label &large,
 +                                       Label &large_aligned, const char * name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
 +
 +    if (UseLASX)
@@ -47947,8 +47986,8 @@ index 0000000000..0549c3c58f
 +  //
 +  address generate_conjoint_short_copy(bool aligned, Label &small, Label &large,
 +                                       Label &large_aligned, const char *name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
 +
 +    array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1);
@@ -47974,8 +48013,8 @@ index 0000000000..0549c3c58f
 +
 +  // Int small copy: less than { int:7, lsx:7, lasx:9 } elements.
 +  void generate_int_small_copy(Label &entry, const char *name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
 +    Label L;
 +    __ bind(entry);
@@ -48211,8 +48250,8 @@ index 0000000000..0549c3c58f
 +  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small,
 +                                         Label &large, Label &large_aligned, const char *name,
 +                                         int small_limit, bool dest_uninitialized = false) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
 +
 +    gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned,
@@ -48239,8 +48278,8 @@ index 0000000000..0549c3c58f
 +  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, Label &small,
 +                                         Label &large, Label &large_aligned, const char *name,
 +                                         int small_limit, bool dest_uninitialized = false) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
 +
 +    if (is_oop) {
@@ -48257,8 +48296,8 @@ index 0000000000..0549c3c58f
 +
 +  // Long small copy: less than { int:4, lsx:4, lasx:5 } elements.
 +  void generate_long_small_copy(Label &entry, const char *name) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +
 +    Label L;
 +    __ bind(entry);
@@ -48361,8 +48400,8 @@ index 0000000000..0549c3c58f
 +  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small,
 +                                          Label &large, Label &large_aligned, const char *name,
 +                                          int small_limit, bool dest_uninitialized = false) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
 +
 +    gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned,
@@ -48389,8 +48428,8 @@ index 0000000000..0549c3c58f
 +  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small,
 +                                          Label &large, Label &large_aligned, const char *name,
 +                                          int small_limit, bool dest_uninitialized = false) {
-+    StubCodeMark mark(this, "StubRoutines", name);
 +    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
 +    address start = __ pc();
 +
 +    if (is_oop) {
@@ -53459,10 +53498,10 @@ index 0000000000..ddb38faf44
 +#endif // CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
 diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp
 new file mode 100644
-index 0000000000..8ad7c5f76e
+index 0000000000..673032218f
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp
-@@ -0,0 +1,4147 @@
+@@ -0,0 +1,4113 @@
 +/*
 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
@@ -55707,38 +55746,6 @@ index 0000000000..8ad7c5f76e
 +  __ jr(T4);
 +}
 +
-+// ----------------------------------------------------------------------------
-+// Volatile variables demand their effects be made known to all CPU's
-+// in order.  Store buffers on most chips allow reads & writes to
-+// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
-+// without some kind of memory barrier (i.e., it's not sufficient that
-+// the interpreter does not reorder volatile references, the hardware
-+// also must not reorder them).
-+//
-+// According to the new Java Memory Model (JMM):
-+// (1) All volatiles are serialized wrt to each other.  ALSO reads &
-+//     writes act as aquire & release, so:
-+// (2) A read cannot let unrelated NON-volatile memory refs that
-+//     happen after the read float up to before the read.  It's OK for
-+//     non-volatile memory refs that happen before the volatile read to
-+//     float down below it.
-+// (3) Similar a volatile write cannot let unrelated NON-volatile
-+//     memory refs that happen BEFORE the write float down to after the
-+//     write.  It's OK for non-volatile memory refs that happen after the
-+//     volatile write to float up before it.
-+//
-+// We only put in barriers around volatile refs (they are expensive),
-+// not _between_ memory refs (that would require us to track the
-+// flavor of the previous memory refs).  Requirements (2) and (3)
-+// require some barriers before volatile stores and after volatile
-+// loads.  These nearly cover requirement (1) but miss the
-+// volatile-store-volatile-load case.  This final case is placed after
-+// volatile-stores although it could just as well go before
-+// volatile-loads.
-+void TemplateTable::volatile_barrier() {
-+  if(os::is_MP()) __ membar(__ StoreLoad);
-+}
-+
 +// we dont shift left 2 bits in get_cache_and_index_at_bcp
 +// for we always need shift the index we use it. the ConstantPoolCacheEntry
 +// is 16-byte long, index is the index in
@@ -55934,7 +55941,7 @@ index 0000000000..8ad7c5f76e
 +
 +    Label notVolatile;
 +    __ beq(scratch, R0, notVolatile);
-+    volatile_barrier();
++    __ membar(MacroAssembler::AnyAny);
 +    __ bind(notVolatile);
 +  }
 +
@@ -56080,7 +56087,7 @@ index 0000000000..8ad7c5f76e
 +  {
 +    Label notVolatile;
 +    __ beq(scratch, R0, notVolatile);
-+    volatile_barrier();
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
 +    __ bind(notVolatile);
 +  }
 +}
@@ -56196,7 +56203,7 @@ index 0000000000..8ad7c5f76e
 +
 +    Label notVolatile;
 +    __ beq(scratch, R0, notVolatile);
-+    volatile_barrier();
++    __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore));
 +    __ bind(notVolatile);
 +  }
 +
@@ -56368,7 +56375,7 @@ index 0000000000..8ad7c5f76e
 +  {
 +    Label notVolatile;
 +    __ beq(scratch, R0, notVolatile);
-+    volatile_barrier();
++    __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore));
 +    __ bind(notVolatile);
 +  }
 +}
@@ -56477,7 +56484,7 @@ index 0000000000..8ad7c5f76e
 +
 +    Label notVolatile;
 +    __ beq(scratch, R0, notVolatile);
-+    volatile_barrier();
++    __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore));
 +    __ bind(notVolatile);
 +  }
 +
@@ -56526,7 +56533,7 @@ index 0000000000..8ad7c5f76e
 +  {
 +    Label notVolatile;
 +    __ beq(scratch, R0, notVolatile);
-+    volatile_barrier();
++    __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore));
 +    __ bind(notVolatile);
 +  }
 +}
@@ -56577,7 +56584,7 @@ index 0000000000..8ad7c5f76e
 +
 +    Label notVolatile;
 +    __ beq(scratch, R0, notVolatile);
-+    volatile_barrier();
++    __ membar(MacroAssembler::AnyAny);
 +    __ bind(notVolatile);
 +  }
 +
@@ -56621,7 +56628,7 @@ index 0000000000..8ad7c5f76e
 +  {
 +    Label notVolatile;
 +    __ beq(scratch, R0, notVolatile);
-+    volatile_barrier();
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
 +    __ bind(notVolatile);
 +  }
 +}
@@ -56651,7 +56658,7 @@ index 0000000000..8ad7c5f76e
 +
 +    Label notVolatile;
 +    __ beq(scratch, R0, notVolatile);
-+    volatile_barrier();
++    __ membar(MacroAssembler::AnyAny);
 +    __ bind(notVolatile);
 +  }
 +
@@ -56676,7 +56683,7 @@ index 0000000000..8ad7c5f76e
 +  {
 +    Label notVolatile;
 +    __ beq(scratch, R0, notVolatile);
-+    volatile_barrier();
++    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
 +    __ bind(notVolatile);
 +  }
 +}
@@ -57008,7 +57015,6 @@ index 0000000000..8ad7c5f76e
 +
 +  __ bind(no_such_method);
 +  // throw exception
-+  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
 +  __ restore_bcp();
 +  __ restore_locals();
 +  // Pass arguments for generating a verbose error message.
@@ -57022,7 +57028,6 @@ index 0000000000..8ad7c5f76e
 +
 +  __ bind(no_such_interface);
 +  // throw exception
-+  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
 +  __ restore_bcp();
 +  __ restore_locals();
 +  // Pass arguments for generating a verbose error message.
@@ -57830,7 +57835,7 @@ index 0000000000..1a93123134
 +#endif // CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp
 new file mode 100644
-index 0000000000..0a9b55d17e
+index 0000000000..9115135166
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp
 @@ -0,0 +1,397 @@
@@ -57995,7 +58000,7 @@ index 0000000000..0a9b55d17e
 +  _supports_cx8 = true;
 +
 +  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
-+    FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650);
++    FLAG_SET_DEFAULT(MaxGCPauseMillis, 150);
 +  }
 +
 +  if (supports_lsx()) {
@@ -64386,13 +64391,13 @@ index 0000000000..b97ecbcca5
 +#endif // CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
 diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp
 new file mode 100644
-index 0000000000..cb1d53db0a
+index 0000000000..f33165334c
 --- /dev/null
 +++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp
-@@ -0,0 +1,149 @@
+@@ -0,0 +1,147 @@
 +/*
 + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -64451,7 +64456,7 @@ index 0000000000..cb1d53db0a
 +  __ beq(count, R0, L_done); // zero count - nothing to do
 +  __ delayed()->nop();
 +
-+  if (UseConcMarkSweepGC) __ sync();
++  if (ct->scanned_concurrently()) __ membar(Assembler::StoreStore);
 +
 +  __ set64(tmp, disp);
 +
@@ -64500,8 +64505,6 @@ index 0000000000..cb1d53db0a
 +
 +  jbyte dirty = CardTable::dirty_card_val();
 +  if (UseCondCardMark) {
-+    Untested("Untested");
-+    __ warn("store_check Untested");
 +    Label L_already_dirty;
 +    __ membar(Assembler::StoreLoad);
 +    __ lb(AT, tmp, 0);
@@ -64512,7 +64515,7 @@ index 0000000000..cb1d53db0a
 +    __ bind(L_already_dirty);
 +  } else {
 +    if (ct->scanned_concurrently()) {
-+      __ membar(Assembler::StoreLoad);
++      __ membar(Assembler::StoreStore);
 +    }
 +    __ sb(R0, tmp, 0);
 +  }
@@ -104399,7 +104402,7 @@ index 0000000000..75c23e8088
 +  return icache_line_size;
 +}
 diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-index 847f7d61d2..f570946090 100644
+index 243cde8d74..124efbfb1b 100644
 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
 +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
 @@ -488,6 +488,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
@@ -104424,10 +104427,10 @@ index 847f7d61d2..f570946090 100644
  void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
                               CodeEmitInfo* info, bool pop_fpu_stack) {
 diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
-index d34ea45c0b..f6b6dbdee3 100644
+index 8bb8c441b2..32384c6491 100644
 --- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
 +++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
-@@ -273,21 +273,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+@@ -275,21 +275,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
    __ move(temp, addr);
  }
  
@@ -104518,10 +104521,10 @@ index 897be2209e..0c27cc20f3 100644
                               CodeEmitInfo* info, bool pop_fpu_stack) {
    assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
 diff --git a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
-index ae297ac635..c786803e0f 100644
+index 86eb2fe88c..114aacaade 100644
 --- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
 +++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
-@@ -213,16 +213,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+@@ -215,16 +215,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
    __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
  }
  
@@ -104607,10 +104610,10 @@ index e503159eb7..2e5609fec8 100644
  void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
    assert(info == NULL, "unused on this code path");
 diff --git a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
-index a09a159722..a02ffafc77 100644
+index b324a3dbd8..0b7cb52dcd 100644
 --- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
 +++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
-@@ -267,19 +267,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+@@ -269,19 +269,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
    __ move(temp, addr);
  }
  
@@ -104790,7 +104793,7 @@ index 3687754e71..791e4ed43f 100644
    void generate_c1_load_barrier_stub(LIR_Assembler* ce,
                                       ZLoadBarrierStubC1* stub) const;
 diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
-index 74945999e7..2b8ac3dd2a 100644
+index 2842a11f92..4f58ec4be3 100644
 --- a/src/hotspot/os/linux/os_linux.cpp
 +++ b/src/hotspot/os/linux/os_linux.cpp
 @@ -23,6 +23,12 @@
@@ -104806,7 +104809,7 @@ index 74945999e7..2b8ac3dd2a 100644
  // no precompiled headers
  #include "jvm.h"
  #include "classfile/classLoader.hpp"
-@@ -3966,6 +3972,8 @@ size_t os::Linux::find_large_page_size() {
+@@ -4060,6 +4066,8 @@ size_t os::Linux::find_large_page_size() {
      IA64_ONLY(256 * M)
      PPC_ONLY(4 * M)
      S390_ONLY(1 * M)
@@ -104847,13 +104850,13 @@ index 0000000000..30719a0340
 + */
 diff --git a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp
 new file mode 100644
-index 0000000000..86f8c963f5
+index 0000000000..8403e7838a
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp
 @@ -0,0 +1,160 @@
 +/*
 + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -104972,7 +104975,7 @@ index 0000000000..86f8c963f5
 +      "   sc.w  %[__cmp],  %[__dest]  \n\t"
 +      "   beqz  %[__cmp],  1b    \n\t"
 +      "2:        \n\t"
-+      "  dbar 0        \n\t"
++      "  dbar 0x700        \n\t"
 +
 +      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
 +      : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
@@ -105000,7 +105003,7 @@ index 0000000000..86f8c963f5
 +      "   sc.d  %[__cmp],  %[__dest]  \n\t"
 +      "   beqz  %[__cmp],  1b    \n\t"
 +      "2:        \n\t"
-+      "   dbar 0 \n\t"
++      "   dbar 0x700 \n\t"
 +
 +      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
 +      : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
@@ -105267,7 +105270,7 @@ index 0000000000..ebd73af0c5
 +
 diff --git a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp
 new file mode 100644
-index 0000000000..295d20e19e
+index 0000000000..5429a1055a
 --- /dev/null
 +++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp
 @@ -0,0 +1,51 @@
@@ -105304,19 +105307,19 @@ index 0000000000..295d20e19e
 +// Included in orderAccess.hpp header file.
 +
 +// Implementation of class OrderAccess.
-+#define inlasm_sync() if (os::is_ActiveCoresMP()) \
++#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \
 +                        __asm__ __volatile__ ("nop"   : : : "memory"); \
 +                      else \
-+                        __asm__ __volatile__ ("dbar 0"   : : : "memory");
++                        __asm__ __volatile__ ("dbar %0"   : :"K"(v) : "memory");
 +
-+inline void OrderAccess::loadload()   { inlasm_sync(); }
-+inline void OrderAccess::storestore() { inlasm_sync(); }
-+inline void OrderAccess::loadstore()  { inlasm_sync(); }
-+inline void OrderAccess::storeload()  { inlasm_sync(); }
++inline void OrderAccess::loadload()   { inlasm_sync(0x15); }
++inline void OrderAccess::storestore() { inlasm_sync(0x1a); }
++inline void OrderAccess::loadstore()  { inlasm_sync(0x16); }
++inline void OrderAccess::storeload()  { inlasm_sync(0x19); }
 +
-+inline void OrderAccess::acquire() { inlasm_sync(); }
-+inline void OrderAccess::release() { inlasm_sync(); }
-+inline void OrderAccess::fence()   { inlasm_sync(); }
++inline void OrderAccess::acquire() { inlasm_sync(0x14); }
++inline void OrderAccess::release() { inlasm_sync(0x12); }
++inline void OrderAccess::fence()   { inlasm_sync(0x10); }
 +
 +
 +#undef inlasm_sync
@@ -109083,10 +109086,10 @@ index 44a5bcbe54..114b155f92 100644
    void call(        LIR_OpJavaCall* op, relocInfo::relocType rtype);
    void ic_call(     LIR_OpJavaCall* op);
 diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp
-index f4b156d59b..fc35f02772 100644
+index 88f6d30697..1d5a6668ea 100644
 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp
 +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp
-@@ -479,13 +479,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
+@@ -480,13 +480,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
                                      CodeEmitInfo* null_check_info, CodeEmitInfo* range_check_info) {
    CodeStub* stub = new RangeCheckStub(range_check_info, index, array);
    if (index->is_constant()) {
@@ -109104,7 +109107,7 @@ index f4b156d59b..fc35f02772 100644
    }
  }
  
-@@ -493,12 +491,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
+@@ -494,12 +492,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
  void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) {
    CodeStub* stub = new RangeCheckStub(info, index);
    if (index->is_constant()) {
@@ -109121,7 +109124,7 @@ index f4b156d59b..fc35f02772 100644
    }
    __ move(index, result);
  }
-@@ -934,7 +931,7 @@ LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) {
+@@ -935,7 +932,7 @@ LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) {
    return tmp;
  }
  
@@ -109130,7 +109133,7 @@ index f4b156d59b..fc35f02772 100644
    if (if_instr->should_profile()) {
      ciMethod* method = if_instr->profiled_method();
      assert(method != NULL, "method should be set if branch is profiled");
-@@ -955,10 +952,17 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) {
+@@ -956,10 +953,17 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) {
      __ metadata2reg(md->constant_encoding(), md_reg);
  
      LIR_Opr data_offset_reg = new_pointer_register();
@@ -109152,7 +109155,7 @@ index f4b156d59b..fc35f02772 100644
  
      // MDO cells are intptr_t, so the data_reg width is arch-dependent.
      LIR_Opr data_reg = new_pointer_register();
-@@ -1315,8 +1319,8 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) {
+@@ -1316,8 +1320,8 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) {
    }
  
    __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info);
@@ -109163,7 +109166,7 @@ index f4b156d59b..fc35f02772 100644
  }
  
  
-@@ -1598,8 +1602,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
+@@ -1599,8 +1603,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
  
    if (GenerateRangeChecks && needs_range_check) {
      if (use_length) {
@@ -109174,7 +109177,7 @@ index f4b156d59b..fc35f02772 100644
      } else {
        array_range_check(array.result(), index.result(), null_check_info, range_check_info);
        // range_check also does the null check
-@@ -1777,12 +1781,9 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) {
+@@ -1778,12 +1782,9 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) {
      CodeEmitInfo* info = state_for(x);
      CodeStub* stub = new RangeCheckStub(info, index.result());
      if (index.result()->is_constant()) {
@@ -109189,7 +109192,7 @@ index f4b156d59b..fc35f02772 100644
      }
      __ move(index.result(), result);
    } else {
-@@ -1860,8 +1861,8 @@ void LIRGenerator::do_LoadIndexed(LoadIndexed* x) {
+@@ -1861,8 +1862,8 @@ void LIRGenerator::do_LoadIndexed(LoadIndexed* x) {
      } else if (use_length) {
        // TODO: use a (modified) version of array_range_check that does not require a
        //       constant length to be loaded to a register
@@ -109200,7 +109203,7 @@ index f4b156d59b..fc35f02772 100644
      } else {
        array_range_check(array.result(), index.result(), null_check_info, range_check_info);
        // The range check performs the null check, so clear it out for the load
-@@ -2234,19 +2235,14 @@ void LIRGenerator::do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegi
+@@ -2235,19 +2236,14 @@ void LIRGenerator::do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegi
      int high_key = one_range->high_key();
      BlockBegin* dest = one_range->sux();
      if (low_key == high_key) {
@@ -109225,7 +109228,7 @@ index f4b156d59b..fc35f02772 100644
        __ branch_destination(L->label());
      }
    }
-@@ -2346,12 +2342,11 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) {
+@@ -2347,12 +2343,11 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) {
      __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg);
      for (int i = 0; i < len; i++) {
        int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i));
@@ -109242,7 +109245,7 @@ index f4b156d59b..fc35f02772 100644
      }
  
      LIR_Opr data_reg = new_pointer_register();
-@@ -2365,8 +2360,7 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) {
+@@ -2366,8 +2361,7 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) {
      do_SwitchRanges(create_lookup_ranges(x), value, x->default_sux());
    } else {
      for (int i = 0; i < len; i++) {
@@ -109252,7 +109255,7 @@ index f4b156d59b..fc35f02772 100644
      }
      __ jump(x->default_sux());
    }
-@@ -2404,12 +2398,11 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) {
+@@ -2405,12 +2399,11 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) {
      __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg);
      for (int i = 0; i < len; i++) {
        int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i));
@@ -109269,7 +109272,7 @@ index f4b156d59b..fc35f02772 100644
      }
  
      LIR_Opr data_reg = new_pointer_register();
-@@ -2424,8 +2417,7 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) {
+@@ -2425,8 +2418,7 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) {
    } else {
      int len = x->length();
      for (int i = 0; i < len; i++) {
@@ -109279,7 +109282,7 @@ index f4b156d59b..fc35f02772 100644
      }
      __ jump(x->default_sux());
    }
-@@ -2935,8 +2927,8 @@ void LIRGenerator::do_IfOp(IfOp* x) {
+@@ -2936,8 +2928,8 @@ void LIRGenerator::do_IfOp(IfOp* x) {
    f_val.dont_load_item();
    LIR_Opr reg = rlock_result(x);
  
@@ -109290,7 +109293,7 @@ index f4b156d59b..fc35f02772 100644
  }
  
  #ifdef JFR_HAVE_INTRINSICS
-@@ -2980,8 +2972,7 @@ void LIRGenerator::do_getEventWriter(Intrinsic* x) {
+@@ -2981,8 +2973,7 @@ void LIRGenerator::do_getEventWriter(Intrinsic* x) {
    __ move(LIR_OprFact::oopConst(NULL), result);
    LIR_Opr jobj = new_register(T_METADATA);
    __ move_wide(jobj_addr, jobj);
@@ -109300,7 +109303,7 @@ index f4b156d59b..fc35f02772 100644
  
    access_load(IN_NATIVE, T_OBJECT, LIR_OprFact::address(new LIR_Address(jobj, T_OBJECT)), result);
  
-@@ -3286,21 +3277,24 @@ void LIRGenerator::do_ProfileInvoke(ProfileInvoke* x) {
+@@ -3287,21 +3278,24 @@ void LIRGenerator::do_ProfileInvoke(ProfileInvoke* x) {
  
  void LIRGenerator::increment_backedge_counter_conditionally(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeEmitInfo* info, int left_bci, int right_bci, int bci) {
    if (compilation()->count_backedges()) {
@@ -109331,7 +109334,7 @@ index f4b156d59b..fc35f02772 100644
      increment_backedge_counter(info, step, bci);
    }
  }
-@@ -3339,8 +3333,7 @@ void LIRGenerator::decrement_age(CodeEmitInfo* info) {
+@@ -3340,8 +3334,7 @@ void LIRGenerator::decrement_age(CodeEmitInfo* info) {
      // DeoptimizeStub will reexecute from the current state in code info.
      CodeStub* deopt = new DeoptimizeStub(info, Deoptimization::Reason_tenured,
                                           Deoptimization::Action_make_not_entrant);
@@ -109341,7 +109344,7 @@ index f4b156d59b..fc35f02772 100644
    }
  }
  
-@@ -3386,8 +3379,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info,
+@@ -3387,8 +3380,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info,
      int freq = frequency << InvocationCounter::count_shift;
      if (freq == 0) {
        if (!step->is_constant()) {
@@ -109351,7 +109354,7 @@ index f4b156d59b..fc35f02772 100644
        } else {
          __ branch(lir_cond_always, T_ILLEGAL, overflow);
        }
-@@ -3395,12 +3387,11 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info,
+@@ -3396,12 +3388,11 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info,
        LIR_Opr mask = load_immediate(freq, T_INT);
        if (!step->is_constant()) {
          // If step is 0, make sure the overflow check below always fails
@@ -109367,7 +109370,7 @@ index f4b156d59b..fc35f02772 100644
      }
      __ branch_destination(overflow->continuation());
    }
-@@ -3513,8 +3504,7 @@ void LIRGenerator::do_RangeCheckPredicate(RangeCheckPredicate *x) {
+@@ -3514,8 +3505,7 @@ void LIRGenerator::do_RangeCheckPredicate(RangeCheckPredicate *x) {
      CodeEmitInfo *info = state_for(x, x->state());
      CodeStub* stub = new PredicateFailedStub(info);
  
@@ -109377,7 +109380,7 @@ index f4b156d59b..fc35f02772 100644
    }
  }
  
-@@ -3661,8 +3651,8 @@ LIR_Opr LIRGenerator::mask_boolean(LIR_Opr array, LIR_Opr value, CodeEmitInfo*&
+@@ -3662,8 +3652,8 @@ LIR_Opr LIRGenerator::mask_boolean(LIR_Opr array, LIR_Opr value, CodeEmitInfo*&
    __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout);
    int diffbit = Klass::layout_helper_boolean_diffbit();
    __ logical_and(layout, LIR_OprFact::intConst(diffbit), layout);
@@ -109415,7 +109418,7 @@ index 3ad325d759..f377b27859 100644
                                      ciMethod *method, LIR_Opr step, int frequency,
                                      int bci, bool backedge, bool notify);
 diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
-index c28055fd99..4e7df88102 100644
+index acc969ac9c..1637965613 100644
 --- a/src/hotspot/share/c1/c1_LinearScan.cpp
 +++ b/src/hotspot/share/c1/c1_LinearScan.cpp
 @@ -35,6 +35,12 @@
@@ -109455,7 +109458,7 @@ index c28055fd99..4e7df88102 100644
      default:
        break;
    }
-@@ -3342,7 +3365,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() {
+@@ -3350,7 +3373,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() {
            check_live = (move->patch_code() == lir_patch_none);
          }
          LIR_OpBranch* branch = op->as_OpBranch();
@@ -109466,7 +109469,7 @@ index c28055fd99..4e7df88102 100644
            // Don't bother checking the stub in this case since the
            // exception stub will never return to normal control flow.
            check_live = false;
-@@ -6192,6 +6217,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi
+@@ -6200,6 +6225,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi
        assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch");
        LIR_OpBranch* branch = (LIR_OpBranch*)op;
  
@@ -109483,7 +109486,7 @@ index c28055fd99..4e7df88102 100644
        if (branch->block() == target_from) {
          branch->change_block(target_to);
        }
-@@ -6320,6 +6355,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
+@@ -6328,6 +6363,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
                  }
                }
              }
@@ -109504,7 +109507,7 @@ index c28055fd99..4e7df88102 100644
            }
          }
        }
-@@ -6395,6 +6444,13 @@ void ControlFlowOptimizer::verify(BlockList* code) {
+@@ -6403,6 +6452,13 @@ void ControlFlowOptimizer::verify(BlockList* code) {
          assert(op_branch->block() == NULL || code->find(op_branch->block()) != -1, "branch target not valid");
          assert(op_branch->ublock() == NULL || code->find(op_branch->ublock()) != -1, "branch target not valid");
        }
@@ -109518,7 +109521,7 @@ index c28055fd99..4e7df88102 100644
      }
  
      for (j = 0; j < block->number_of_sux() - 1; j++) {
-@@ -6639,6 +6695,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) {
+@@ -6647,6 +6703,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) {
            break;
          }
  
@@ -109544,7 +109547,7 @@ index c28055fd99..4e7df88102 100644
          case lir_add:
          case lir_sub:
 diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp
-index 747971af41..093831ac09 100644
+index 6bc63116bd..41c0a0ea3d 100644
 --- a/src/hotspot/share/code/nmethod.cpp
 +++ b/src/hotspot/share/code/nmethod.cpp
 @@ -22,6 +22,12 @@
@@ -109560,7 +109563,7 @@ index 747971af41..093831ac09 100644
  #include "precompiled.hpp"
  #include "jvm.h"
  #include "code/codeCache.hpp"
-@@ -2155,7 +2161,8 @@ void nmethod::verify_scopes() {
+@@ -2159,7 +2165,8 @@ void nmethod::verify_scopes() {
          //verify_interrupt_point(iter.addr());
          break;
        case relocInfo::runtime_call_type:
@@ -109570,7 +109573,7 @@ index 747971af41..093831ac09 100644
          address destination = iter.reloc()->value();
          // Right now there is no way to find out which entries support
          // an interrupt point.  It would be nice if we had this
-@@ -2392,7 +2399,8 @@ const char* nmethod::reloc_string_for(u_char* begin, u_char* end) {
+@@ -2396,7 +2403,8 @@ const char* nmethod::reloc_string_for(u_char* begin, u_char* end) {
            return st.as_string();
          }
          case relocInfo::runtime_call_type:
@@ -109666,7 +109669,7 @@ index 57931a1a6a..fb56fd3ab1 100644
  // Trampoline Relocations.
  // A trampoline allows to encode a small branch in the code, even if there
 diff --git a/src/hotspot/share/code/vtableStubs.cpp b/src/hotspot/share/code/vtableStubs.cpp
-index 7892cc85b3..3637aefe10 100644
+index 3c986f40ff..23d07f0505 100644
 --- a/src/hotspot/share/code/vtableStubs.cpp
 +++ b/src/hotspot/share/code/vtableStubs.cpp
 @@ -22,6 +22,12 @@
@@ -109954,7 +109957,7 @@ index 9f8ce74243..3c1862d826 100644
  }
  
 diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp
-index 24e4c98175..a6b310290d 100644
+index 6483159136..f40e304f9a 100644
 --- a/src/hotspot/share/interpreter/interpreterRuntime.cpp
 +++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp
 @@ -22,6 +22,12 @@
@@ -109970,7 +109973,7 @@ index 24e4c98175..a6b310290d 100644
  #include "precompiled.hpp"
  #include "classfile/javaClasses.inline.hpp"
  #include "classfile/systemDictionary.hpp"
-@@ -1506,7 +1512,7 @@ IRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* thread, Meth
+@@ -1497,7 +1503,7 @@ IRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* thread, Meth
    // preparing the same method will be sure to see non-null entry & mirror.
  IRT_END
  
@@ -110114,10 +110117,10 @@ index 8927063330..b5bb5c2887 100644
  
  #define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
 diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp
-index 80958b0469..08d13a4189 100644
+index c3a884fafe..103789d9b1 100644
 --- a/src/hotspot/share/memory/metaspace.cpp
 +++ b/src/hotspot/share/memory/metaspace.cpp
-@@ -1082,12 +1082,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
+@@ -1083,12 +1083,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
    // Don't use large pages for the class space.
    bool large_pages = false;
  
@@ -110132,7 +110135,7 @@ index 80958b0469..08d13a4189 100644
  
    ReservedSpace metaspace_rs;
  
-@@ -1113,7 +1113,8 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
+@@ -1114,7 +1114,8 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
      // below 32g to get a zerobased CCS. For simplicity we reuse the search
      // strategy for AARCH64.
  
@@ -110142,7 +110145,7 @@ index 80958b0469..08d13a4189 100644
      for (char *a = align_up(requested_addr, increment);
           a < (char*)(1024*G);
           a += increment) {
-@@ -1144,7 +1145,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
+@@ -1145,7 +1146,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
      }
    }
  
@@ -110300,8 +110303,43 @@ index 84123b29ec..77fbacf2d8 100644
  #include "services/memTracker.hpp"
  #include "utilities/dtrace.hpp"
  #include "utilities/globalDefinitions.hpp"
+diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp
+index ce23aafa8f..d3dfb74d5b 100644
+--- a/src/hotspot/share/runtime/objectMonitor.cpp
++++ b/src/hotspot/share/runtime/objectMonitor.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023, These
++ * modifications are Copyright (c) 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "classfile/vmSymbols.hpp"
+ #include "jfr/jfrEvents.hpp"
+@@ -308,6 +314,9 @@ void ObjectMonitor::enter(TRAPS) {
+   }
+ 
+   assert(_owner != Self, "invariant");
++  // Thread _succ != current assertion load reording before Thread if (_succ == current) _succ = nullptr.
++  // But expect order is firstly if (_succ == current) _succ = nullptr then _succ != current assertion.
++  DEBUG_ONLY(LOONGARCH64_ONLY(__asm__ __volatile__ ("dbar 0x700\n");)MIPS64_ONLY(OrderAccess::loadload();))
+   assert(_succ != Self, "invariant");
+   assert(Self->is_Java_thread(), "invariant");
+   JavaThread * jt = (JavaThread *) Self;
+@@ -469,6 +478,7 @@ void ObjectMonitor::EnterI(TRAPS) {
+   }
+ 
+   // The Spin failed -- Enqueue and park the thread ...
++  DEBUG_ONLY(LOONGARCH64_ONLY(__asm__ __volatile__ ("dbar 0x700\n");)MIPS64_ONLY(OrderAccess::loadload();))
+   assert(_succ != Self, "invariant");
+   assert(_owner != Self, "invariant");
+   assert(_Responsible != Self, "invariant");
 diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp
-index e0f4a2af1f..09cc4b1ba5 100644
+index 1c540bb621..0e44240d40 100644
 --- a/src/hotspot/share/runtime/os.cpp
 +++ b/src/hotspot/share/runtime/os.cpp
 @@ -22,6 +22,12 @@
@@ -110381,7 +110419,7 @@ index c758fc5743..a8c4638f6a 100644
  inline bool is_even(intx x) { return !is_odd(x); }
  
 diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
-index cf80253868..f611daf36d 100644
+index 6605ab367c..5a2be6ef15 100644
 --- a/src/hotspot/share/utilities/macros.hpp
 +++ b/src/hotspot/share/utilities/macros.hpp
 @@ -22,6 +22,12 @@
@@ -110397,7 +110435,7 @@ index cf80253868..f611daf36d 100644
  #ifndef SHARE_VM_UTILITIES_MACROS_HPP
  #define SHARE_VM_UTILITIES_MACROS_HPP
  
-@@ -531,6 +537,38 @@
+@@ -535,6 +541,38 @@
  #define NOT_SPARC(code) code
  #endif
  
@@ -110436,7 +110474,7 @@ index cf80253868..f611daf36d 100644
  #if defined(PPC32) || defined(PPC64)
  #ifndef PPC
  #define PPC
-@@ -623,16 +661,34 @@
+@@ -627,16 +665,34 @@
  //   OS_CPU_HEADER(vmStructs)          --> vmStructs_linux_sparc.hpp
  //
  // basename<cpu>.hpp / basename<cpu>.inline.hpp
@@ -110635,7 +110673,7 @@ index 8318e8e021..07064e76ee 100644
  // This C bool type must be int for compatibility with Linux calls and
  // it would be a mistake to equivalence it to C++ bool on many platforms
 diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-index de5254d859..eefe55959c 100644
+index c22b5d1cb3..36d6343960 100644
 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
 +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
 @@ -22,6 +22,12 @@
@@ -110651,12 +110689,12 @@ index de5254d859..eefe55959c 100644
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
-@@ -142,7 +148,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
- #define PTRACE_GETREGS_REQ PT_GETREGS
- #endif
- 
--#ifdef PTRACE_GETREGS_REQ
-+#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64)
+@@ -151,7 +157,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
+    return false;
+  }
+  return true;
+-#elif defined(PTRACE_GETREGS_REQ)
++#elif defined(PTRACE_GETREGS_REQ) && !defined(loongarch64)
   if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
     print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
     return false;
@@ -116548,7 +116586,7 @@ index 127bb6abcd..c9277604ae 100644
              Platform.isSolaris();
      }
 diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-index 77458554b7..05aee6b84c 100644
+index 126a43a900..55bd135f6e 100644
 --- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
 +++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
 @@ -45,7 +45,7 @@ import java.util.Set;
@@ -116588,35 +116626,8 @@ index 7990c49a1f..025048c6b0 100644
          }
      }
  }
-diff --git a/test/jdk/sun/security/pkcs11/PKCS11Test.java b/test/jdk/sun/security/pkcs11/PKCS11Test.java
-index 5d4ee095f7..de622b128b 100644
---- a/test/jdk/sun/security/pkcs11/PKCS11Test.java
-+++ b/test/jdk/sun/security/pkcs11/PKCS11Test.java
-@@ -21,6 +21,12 @@
-  * questions.
-  */
- 
-+/*
-+ * This file has been modified by Loongson Technology in 2022, These
-+ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
-+ * available on the same license terms set forth above.
-+ */
-+
- // common infrastructure for SunPKCS11 tests
- 
- import java.io.BufferedReader;
-@@ -732,6 +738,9 @@ public abstract class PKCS11Test {
-                 "/usr/lib64/" });
-         osMap.put("Linux-ppc64-64", new String[] { "/usr/lib64/" });
-         osMap.put("Linux-ppc64le-64", new String[] { "/usr/lib64/" });
-+        osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"});
-+        osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/",
-+                "/usr/lib64/" });
-         osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" });
-         osMap.put("Windows-x86-32", new String[] {});
-         osMap.put("Windows-amd64-64", new String[] {});
 diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
-index f4ee0546c7..a600d15b61 100644
+index 6269373c2b..440ec4664f 100644
 --- a/test/lib/jdk/test/lib/Platform.java
 +++ b/test/lib/jdk/test/lib/Platform.java
 @@ -21,6 +21,12 @@
@@ -116631,8 +116642,8 @@ index f4ee0546c7..a600d15b61 100644
 +
  package jdk.test.lib;
  
- import java.io.FileNotFoundException;
-@@ -226,6 +232,14 @@ public class Platform {
+ import java.io.BufferedReader;
+@@ -229,6 +235,14 @@ public class Platform {
          return isArch("(i386)|(x86(?!_64))");
      }
  
diff --git a/ZGC-Redesign-C2-load-barrier-to-expand-on-th.patch b/ZGC-AArch64-Optimizations-and-Fixes.patch
similarity index 76%
rename from ZGC-Redesign-C2-load-barrier-to-expand-on-th.patch
rename to ZGC-AArch64-Optimizations-and-Fixes.patch
index 58ed16d66c5739eb962ca2d04319a5ceef9cc576..0fa61a5978ddb46551c8d1fe7b0fe64cfb13d336 100644
--- a/ZGC-Redesign-C2-load-barrier-to-expand-on-th.patch
+++ b/ZGC-AArch64-Optimizations-and-Fixes.patch
@@ -1,16 +1,108 @@
+From 1932790364789c601d463a4de8f757cf604344c0 Mon Sep 17 00:00:00 2001
+
+---
+ make/hotspot/gensrc/GensrcAdlc.gmk            |    6 +
+ src/hotspot/cpu/aarch64/aarch64.ad            |  207 +-
+ .../gc/z/zBarrierSetAssembler_aarch64.cpp     |  246 ++-
+ .../gc/z/zBarrierSetAssembler_aarch64.hpp     |   26 +-
+ src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad     |  268 +++
+ .../cpu/aarch64/macroAssembler_aarch64.cpp    |   61 +
+ .../cpu/aarch64/macroAssembler_aarch64.hpp    |    6 +
+ .../templateInterpreterGenerator_aarch64.cpp  |    4 +-
+ .../cpu/aarch64/vm_version_aarch64.hpp        |    8 +
+ .../cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp |  404 +++-
+ .../cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp |   30 +-
+ src/hotspot/cpu/x86/gc/z/z_x86_64.ad          |  168 ++
+ src/hotspot/cpu/x86/x86.ad                    |  136 +-
+ src/hotspot/cpu/x86/x86_64.ad                 |  437 +----
+ .../gc/z/zBackingFile_linux_aarch64.cpp       |    2 +-
+ src/hotspot/share/adlc/formssel.cpp           |    8 -
+ src/hotspot/share/c1/c1_Instruction.cpp       |    1 +
+ src/hotspot/share/ci/ciInstanceKlass.cpp      |   44 +
+ src/hotspot/share/classfile/vmSymbols.hpp     |    4 +
+ .../share/compiler/compilerDirectives.hpp     |    3 +-
+ .../share/gc/shared/c2/barrierSetC2.cpp       |   73 +-
+ .../share/gc/shared/c2/barrierSetC2.hpp       |   15 +-
+ src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp   | 1657 +++--------------
+ src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp   |  181 +-
+ .../share/gc/z/zBarrierSetAssembler.hpp       |    5 +-
+ src/hotspot/share/gc/z/zGlobals.hpp           |    7 +-
+ src/hotspot/share/gc/z/zHeap.cpp              |    5 +
+ src/hotspot/share/gc/z/zLiveMap.cpp           |   20 +-
+ src/hotspot/share/gc/z/zLiveMap.inline.hpp    |    9 +-
+ src/hotspot/share/gc/z/zMarkStack.cpp         |   74 +-
+ src/hotspot/share/gc/z/zMarkStack.hpp         |    1 +
+ src/hotspot/share/gc/z/zWorkers.cpp           |   23 +-
+ src/hotspot/share/gc/z/zWorkers.hpp           |    2 -
+ src/hotspot/share/gc/z/z_globals.hpp          |    6 +-
+ src/hotspot/share/opto/c2compiler.cpp         |    1 +
+ src/hotspot/share/opto/classes.cpp            |    3 -
+ src/hotspot/share/opto/classes.hpp            |   11 -
+ src/hotspot/share/opto/compile.cpp            |   52 +-
+ src/hotspot/share/opto/compile.hpp            |   25 +-
+ src/hotspot/share/opto/escape.cpp             |   15 -
+ src/hotspot/share/opto/graphKit.cpp           |   10 +-
+ src/hotspot/share/opto/graphKit.hpp           |   10 +-
+ src/hotspot/share/opto/lcm.cpp                |    1 -
+ src/hotspot/share/opto/library_call.cpp       |   17 +
+ src/hotspot/share/opto/loopnode.cpp           |    1 -
+ src/hotspot/share/opto/loopopts.cpp           |    3 -
+ src/hotspot/share/opto/machnode.hpp           |    9 +-
+ src/hotspot/share/opto/matcher.cpp            |   45 +-
+ src/hotspot/share/opto/memnode.cpp            |   14 +-
+ src/hotspot/share/opto/memnode.hpp            |   53 +-
+ src/hotspot/share/opto/node.cpp               |    7 -
+ src/hotspot/share/opto/node.hpp               |    6 -
+ src/hotspot/share/opto/output.cpp             |  424 +++--
+ src/hotspot/share/opto/output.hpp             |    5 +-
+ src/hotspot/share/opto/parse1.cpp             |    1 +
+ src/hotspot/share/opto/phaseX.cpp             |    8 +-
+ src/hotspot/share/opto/vectornode.cpp         |    1 -
+ src/hotspot/share/runtime/sharedRuntime.cpp   |    2 +
+ src/hotspot/share/runtime/sharedRuntime.hpp   |    5 +
+ src/hotspot/share/utilities/bitMap.hpp        |   17 +-
+ src/hotspot/share/utilities/bitMap.inline.hpp |   34 +-
+ .../share/classes/java/util/Random.java       |    2 +
+ .../runtime/MemberName/MemberNameLeak.java    |    1 +
+ 63 files changed, 1941 insertions(+), 2989 deletions(-)
+ create mode 100644 src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
+ create mode 100644 src/hotspot/cpu/x86/gc/z/z_x86_64.ad
+
+diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
+index c5a3ac572..2af2f9ac4 100644
+--- a/make/hotspot/gensrc/GensrcAdlc.gmk
++++ b/make/hotspot/gensrc/GensrcAdlc.gmk
+@@ -150,6 +150,12 @@ ifeq ($(call check-jvm-feature, compiler2), true)
+       $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \
+     )))
+ 
++  ifeq ($(call check-jvm-feature, zgc), true)
++    AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
++        $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/z/z_$(HOTSPOT_TARGET_CPU).ad \
++      )))
++  endif
++
+   ifeq ($(call check-jvm-feature, shenandoahgc), true)
+     AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
+         $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \
 diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
-index af3e593a9..05b36e279 100644
+index a8976d5d4..b253e823a 100644
 --- a/src/hotspot/cpu/aarch64/aarch64.ad
 +++ b/src/hotspot/cpu/aarch64/aarch64.ad
-@@ -1131,6 +1131,7 @@ definitions %{
- source_hpp %{
- 
- #include "gc/z/c2/zBarrierSetC2.hpp"
-+#include "gc/z/zThreadLocalData.hpp"
- 
+@@ -1142,12 +1142,6 @@ definitions %{
+   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
  %}
  
-@@ -2501,17 +2502,7 @@ void Compile::reshape_address(AddPNode* addp) {
+-source_hpp %{
+-
+-#include "gc/z/c2/zBarrierSetC2.hpp"
+-
+-%}
+-
+ //----------SOURCE BLOCK-------------------------------------------------------
+ // This is a block of C++ code which provides values, functions, and
+ // definitions necessary in the rest of the architecture description
+@@ -2525,17 +2519,7 @@ void Compile::reshape_address(AddPNode* addp) {
      __ INSN(REG, as_Register(BASE));                                    \
    }
  
@@ -28,10 +120,10 @@ index af3e593a9..05b36e279 100644
 +static Address mem2address(int opcode, Register base, int index, int size, int disp)
    {
      Address::extend scale;
-
-@@ -2409,13 +2409,18 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
+ 
+@@ -2554,13 +2538,18 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
      }
-
+ 
      if (index == -1) {
 -      (masm.*insn)(reg, Address(base, disp));
 +      return Address(base, disp);
@@ -41,7 +133,7 @@ index af3e593a9..05b36e279 100644
 +      return Address(base, as_Register(index), scale);
      }
    }
-
+ 
 +typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
 +typedef void (MacroAssembler::* mem_insn2)(Register Rt, Register adr);
 +typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
@@ -50,10 +142,10 @@ index af3e593a9..05b36e279 100644
    static void loadStore(MacroAssembler masm, mem_insn insn,
                           Register reg, int opcode,
                           Register base, int index, int size, int disp,
-@@ -2450,9 +2455,20 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
+@@ -2595,9 +2584,20 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
      }
    }
-
+ 
 +  // Used for all non-volatile memory accesses.  The use of
 +  // $mem->opcode() to discover whether this pattern uses sign-extended
 +  // offsets is something of a kludge.
@@ -72,10 +164,10 @@ index af3e593a9..05b36e279 100644
 +                        Register base, int index, int size, int disp)
    {
      Address::extend scale;
-
-@@ -2474,8 +2490,8 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
+ 
+@@ -2619,8 +2619,8 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
    }
-
+ 
    static void loadStore(MacroAssembler masm, mem_vector_insn insn,
 -                         FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
 -                         int opcode, Register base, int index, int size, int disp)
@@ -84,7 +176,7 @@ index af3e593a9..05b36e279 100644
    {
      if (index == -1) {
        (masm.*insn)(reg, T, Address(base, disp));
-@@ -3797,7 +3805,7 @@ frame %{
+@@ -3921,7 +3921,7 @@ frame %{
      static const int hi[Op_RegL + 1] = { // enum name
        0,                                 // Op_Node
        0,                                 // Op_Set
@@ -93,7 +185,7 @@ index af3e593a9..05b36e279 100644
        OptoReg::Bad,                      // Op_RegI
        R0_H_num,                          // Op_RegP
        OptoReg::Bad,                      // Op_RegF
-@@ -6929,7 +6937,7 @@ instruct loadRange(iRegINoSp dst, memory mem)
+@@ -7075,7 +7075,7 @@ instruct loadRange(iRegINoSp dst, memory mem)
  instruct loadP(iRegPNoSp dst, memory mem)
  %{
    match(Set dst (LoadP mem));
@@ -102,7 +194,7 @@ index af3e593a9..05b36e279 100644
  
    ins_cost(4 * INSN_COST);
    format %{ "ldr  $dst, $mem\t# ptr" %}
-@@ -7622,6 +7630,7 @@ instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
+@@ -7768,6 +7768,7 @@ instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
  instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
  %{
    match(Set dst (LoadP mem));
@@ -110,7 +202,7 @@ index af3e593a9..05b36e279 100644
  
    ins_cost(VOLATILE_REF_COST);
    format %{ "ldar  $dst, $mem\t# ptr" %}
-@@ -8506,6 +8515,7 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS
+@@ -8652,6 +8653,7 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS
  instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
  
    match(Set res (CompareAndSwapP mem (Binary oldval newval)));
@@ -118,7 +210,7 @@ index af3e593a9..05b36e279 100644
    ins_cost(2 * VOLATILE_REF_COST);
  
    effect(KILL cr);
-@@ -8619,7 +8629,7 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL
+@@ -8765,7 +8767,7 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL
  
  instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
  
@@ -127,7 +219,7 @@ index af3e593a9..05b36e279 100644
    match(Set res (CompareAndSwapP mem (Binary oldval newval)));
    ins_cost(VOLATILE_REF_COST);
  
-@@ -8750,6 +8760,7 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne
+@@ -8896,6 +8898,7 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne
  %}
  
  instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
@@ -135,7 +227,7 @@ index af3e593a9..05b36e279 100644
    match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
    ins_cost(2 * VOLATILE_REF_COST);
    effect(TEMP_DEF res, KILL cr);
-@@ -8849,7 +8860,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN
+@@ -8995,7 +8998,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN
  %}
  
  instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
@@ -144,7 +236,7 @@ index af3e593a9..05b36e279 100644
    match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
    ins_cost(VOLATILE_REF_COST);
    effect(TEMP_DEF res, KILL cr);
-@@ -8950,6 +8961,7 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne
+@@ -9096,6 +9099,7 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne
  %}
  
  instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
@@ -152,7 +244,7 @@ index af3e593a9..05b36e279 100644
    match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
    ins_cost(2 * VOLATILE_REF_COST);
    effect(KILL cr);
-@@ -9057,8 +9069,8 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN
+@@ -9203,8 +9207,8 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN
  %}
  
  instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
@@ -162,7 +254,7 @@ index af3e593a9..05b36e279 100644
    ins_cost(VOLATILE_REF_COST);
    effect(KILL cr);
    format %{
-@@ -9108,6 +9120,7 @@ instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
+@@ -9254,6 +9258,7 @@ instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
  %}
  
  instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
@@ -170,7 +262,7 @@ index af3e593a9..05b36e279 100644
    match(Set prev (GetAndSetP mem newv));
    ins_cost(2 * VOLATILE_REF_COST);
    format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
-@@ -9151,7 +9164,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
+@@ -9297,7 +9302,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
  %}
  
  instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
@@ -179,30 +271,20 @@ index af3e593a9..05b36e279 100644
    match(Set prev (GetAndSetP mem newv));
    ins_cost(VOLATILE_REF_COST);
    format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
-@@ -17477,145 +17490,238 @@ instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
- 
- source %{
+@@ -18518,150 +18523,6 @@ instruct vpopcount2I(vecD dst, vecD src) %{
+   ins_pipe(pipe_class_default);
+ %}
  
+-source %{
+-
 -#include "gc/z/zBarrierSetAssembler.hpp"
-+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
-+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
-+  __ ldr(tmp, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
-+  __ andr(tmp, tmp, ref);
-+  __ cbnz(tmp, *stub->entry());
-+  __ bind(*stub->continuation());
-+}
- 
+-
 -static void z_load_barrier_slow_reg(MacroAssembler& _masm, Register dst, 
 -                                    Register base, int index, int scale, 
 -                                    int disp, bool weak) {
 -  const address stub = weak ? ZBarrierSet::assembler()->load_barrier_weak_slow_stub(dst)
 -                            : ZBarrierSet::assembler()->load_barrier_slow_stub(dst);
-+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
-+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
-+  __ b(*stub->entry());
-+  __ bind(*stub->continuation());
-+}
- 
+-
 -  if (index == -1) {
 -    if (disp != 0) {
 -      __ lea(dst, Address(base, disp));
@@ -216,53 +298,14 @@ index af3e593a9..05b36e279 100644
 -    } else {
 -      __ lea(dst, Address(base, disp));
 -      __ lea(dst, Address(dst, index_reg, Address::lsl(scale)));
-+%}
-+
-+// Load Pointer
-+instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr)
-+%{
-+  match(Set dst (LoadP mem));
-+  predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierStrong));
-+  effect(TEMP dst, KILL cr);
-+
-+  ins_cost(4 * INSN_COST);
-+
-+  format %{ "ldr  $dst, $mem" %}
-+
-+  ins_encode %{
-+    const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
-+    __ ldr($dst$$Register, ref_addr);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, false /* weak */);
-     }
+-    }
 -  }
-+  %}
- 
+-
 -  __ far_call(RuntimeAddress(stub));
 -}
-+  ins_pipe(iload_reg_mem);
-+%}
-+
-+// Load Weak Pointer
-+instruct zLoadWeakP(iRegPNoSp dst, memory mem, rFlagsReg cr)
-+%{
-+  match(Set dst (LoadP mem));
-+  predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierWeak));
-+  effect(TEMP dst, KILL cr);
-+
-+  ins_cost(4 * INSN_COST);
- 
-+  format %{ "ldr  $dst, $mem" %}
-+
-+  ins_encode %{
-+    const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
-+    __ ldr($dst$$Register, ref_addr);
-+    z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, true /* weak */);
-+  %}
-+
-+  ins_pipe(iload_reg_mem);
- %}
- 
+-
+-%}
+-
 -//
 -// Execute ZGC load barrier (strong) slow path
 -//
@@ -288,28 +331,10 @@ index af3e593a9..05b36e279 100644
 -  ins_encode %{
 -    z_load_barrier_slow_reg(_masm, $dst$$Register, $mem$$base$$Register,
 -                            $mem$$index, $mem$$scale, $mem$$disp, false);
-+// Load Pointer Volatile
-+instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg cr)
-+%{
-+  match(Set dst (LoadP mem));
-+  predicate(UseZGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
-+  effect(TEMP dst, KILL cr);
-+
-+  ins_cost(VOLATILE_REF_COST);
-+
-+  format %{ "ldar  $dst, $mem\t" %}
-+
-+  ins_encode %{
-+    __ ldar($dst$$Register, $mem$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      z_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, false /* weak */);
-+    }
-   %}
+-  %}
 -  ins_pipe(pipe_slow);
-+
-+  ins_pipe(pipe_serial);
- %}
- 
+-%}
+-
 -//
 -// Execute ZGC load barrier (weak) slow path
 -//
@@ -335,83 +360,30 @@ index af3e593a9..05b36e279 100644
 -  ins_encode %{
 -    z_load_barrier_slow_reg(_masm, $dst$$Register, $mem$$base$$Register,
 -                            $mem$$index, $mem$$scale, $mem$$disp, true);
-+instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(KILL cr, TEMP_DEF res);
-+
-+  ins_cost(2 * VOLATILE_REF_COST);
-+ 
-+  format %{ "cmpxchg $mem, $oldval, $newval\n\t"
-+            "cset    $res, EQ" %}
-+
-+  ins_encode %{
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+               false /* acquire */, true /* release */, false /* weak */, rscratch2);
-+    __ cset($res$$Register, Assembler::EQ);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(rscratch1, rscratch1, rscratch2);
-+      __ cbz(rscratch1, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+                 false /* acquire */, true /* release */, false /* weak */, rscratch2);
-+      __ cset($res$$Register, Assembler::EQ);
-+      __ bind(good);
-+    }
-   %}
-+
-   ins_pipe(pipe_slow);
- %}
- 
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
 -// Specialized versions of compareAndExchangeP that adds a keepalive that is consumed
 -// but doesn't affect output.
-+instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+  predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
-+  effect(KILL cr, TEMP_DEF res);
- 
+-
 -instruct z_compareAndExchangeP(iRegPNoSp res, indirect mem,
 -                               iRegP oldval, iRegP newval, iRegP keepalive,
 -                               rFlagsReg cr) %{
 -  match(Set res (ZCompareAndExchangeP (Binary mem keepalive) (Binary oldval newval)));
-   ins_cost(2 * VOLATILE_REF_COST);
+-  ins_cost(2 * VOLATILE_REF_COST);
 -  effect(TEMP_DEF res, KILL cr);
 -  format %{
 -    "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
 -  %}
-+
-+ format %{ "cmpxchg $mem, $oldval, $newval\n\t"
-+           "cset    $res, EQ" %}
-+
-   ins_encode %{
+-  ins_encode %{
 -    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
 -               Assembler::xword, /*acquire*/ false, /*release*/ true,
 -               /*weak*/ false, $res$$Register);
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+               true /* acquire */, true /* release */, false /* weak */, rscratch2);
-+    __ cset($res$$Register, Assembler::EQ);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(rscratch1, rscratch1, rscratch2);
-+      __ cbz(rscratch1, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ );
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+                 true /* acquire */, true /* release */, false /* weak */, rscratch2);
-+      __ cset($res$$Register, Assembler::EQ);
-+      __ bind(good);
-+    }
-   %}
-+
-   ins_pipe(pipe_slow);
- %}
- 
+-  %}
+-  ins_pipe(pipe_slow);
+-%}
+-
 -instruct z_compareAndSwapP(iRegINoSp res,
 -                           indirect mem,
 -                           iRegP oldval, iRegP newval, iRegP keepalive,
@@ -419,16 +391,11 @@ index af3e593a9..05b36e279 100644
 -
 -  match(Set res (ZCompareAndSwapP (Binary mem keepalive) (Binary oldval newval)));
 -  match(Set res (ZWeakCompareAndSwapP (Binary mem keepalive) (Binary oldval newval)));
-+instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
-+  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(TEMP_DEF res, KILL cr);
- 
-   ins_cost(2 * VOLATILE_REF_COST);
- 
+-
+-  ins_cost(2 * VOLATILE_REF_COST);
+-
 -  effect(KILL cr);
-+  format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
- 
+-
 - format %{
 -    "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
 -    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
@@ -436,93 +403,25 @@ index af3e593a9..05b36e279 100644
 -
 - ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
 -            aarch64_enc_cset_eq(res));
-+  ins_encode %{
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+               false /* acquire */, true /* release */, false /* weak */, $res$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(rscratch1, rscratch1, $res$$Register);
-+      __ cbz(rscratch1, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+                 false /* acquire */, true /* release */, false /* weak */, $res$$Register);
-+      __ bind(good);
-+    }
-+  %}
- 
-   ins_pipe(pipe_slow);
- %}
- 
-+instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
-+  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
-+  predicate(UseZGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(TEMP_DEF res, KILL cr);
-+
-+  ins_cost(2 * VOLATILE_REF_COST);
-+
-+  format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
- 
+-
+-  ins_pipe(pipe_slow);
+-%}
+-
+-
 -instruct z_get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev,
 -                        iRegP keepalive) %{
 -  match(Set prev (ZGetAndSetP mem (Binary newv keepalive)));
-+  ins_encode %{
-+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
-+    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+               true /* acquire */, true /* release */, false /* weak */, $res$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ andr(rscratch1, rscratch1, $res$$Register);
-+      __ cbz(rscratch1, good);
-+      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
-+      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
-+                 true /* acquire */, true /* release */, false /* weak */, $res$$Register);
-+      __ bind(good);
-+    }
-+  %}
-+
-+  ins_pipe(pipe_slow);
-+%}
-+
-+instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
-+  match(Set prev (GetAndSetP mem newv));
-+  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(TEMP_DEF prev, KILL cr);
- 
-   ins_cost(2 * VOLATILE_REF_COST);
-+
-   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
-+
-   ins_encode %{
+-
+-  ins_cost(2 * VOLATILE_REF_COST);
+-  format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
+-  ins_encode %{
 -    __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
-+    __ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
-+    }
-+  %}
-+
-+  ins_pipe(pipe_serial);
-+%}
-+
-+instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
-+  match(Set prev (GetAndSetP mem newv));
-+  predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
-+  effect(TEMP_DEF prev, KILL cr);
-+
-+  ins_cost(VOLATILE_REF_COST);
-+
-+  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
-+
-+  ins_encode %{
-+    __ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
-+    }
-   %}
-   ins_pipe(pipe_serial);
- %}
+-  %}
+-  ins_pipe(pipe_serial);
+-%}
+ 
+ //----------PEEPHOLE RULES-----------------------------------------------------
+ // These must follow all instruction definitions as they use the names
 diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
 index 8e169ace4..787c0c1af 100644
 --- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp
@@ -933,11 +832,285 @@ index 7e8be01cc..cca873825 100644
  };
  
  #endif // CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP
+diff --git a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
+new file mode 100644
+index 000000000..50cc6f924
+--- /dev/null
++++ b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad
+@@ -0,0 +1,268 @@
++//
++// Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++
++source_hpp %{
++
++#include "gc/z/c2/zBarrierSetC2.hpp"
++#include "gc/z/zThreadLocalData.hpp"
++
++%}
++
++source %{
++
++static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
++  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
++  __ ldr(tmp, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
++  __ andr(tmp, tmp, ref);
++  __ cbnz(tmp, *stub->entry());
++  __ bind(*stub->continuation());
++}
++
++static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
++  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
++  __ b(*stub->entry());
++  __ bind(*stub->continuation());
++}
++
++%}
++
++// Load Pointer
++instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr)
++%{
++  match(Set dst (LoadP mem));
++  predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierStrong));
++  effect(TEMP dst, KILL cr);
++
++  ins_cost(4 * INSN_COST);
++
++  format %{ "ldr  $dst, $mem" %}
++
++  ins_encode %{
++    const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
++    __ ldr($dst$$Register, ref_addr);
++    if (barrier_data() != ZLoadBarrierElided) {
++      z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, false /* weak */);
++    }
++  %}
++
++  ins_pipe(iload_reg_mem);
++%}
++
++// Load Weak Pointer
++instruct zLoadWeakP(iRegPNoSp dst, memory mem, rFlagsReg cr)
++%{
++  match(Set dst (LoadP mem));
++  predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierWeak));
++  effect(TEMP dst, KILL cr);
++
++  ins_cost(4 * INSN_COST);
++
++  format %{ "ldr  $dst, $mem" %}
++
++  ins_encode %{
++    const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
++    __ ldr($dst$$Register, ref_addr);
++    z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, true /* weak */);
++  %}
++
++  ins_pipe(iload_reg_mem);
++%}
++
++// Load Pointer Volatile
++instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg cr)
++%{
++  match(Set dst (LoadP mem));
++  predicate(UseZGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
++  effect(TEMP dst, KILL cr);
++
++  ins_cost(VOLATILE_REF_COST);
++
++  format %{ "ldar  $dst, $mem\t" %}
++
++  ins_encode %{
++    __ ldar($dst$$Register, $mem$$Register);
++    if (barrier_data() != ZLoadBarrierElided) {
++      z_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, false /* weak */);
++    }
++  %}
++
++  ins_pipe(pipe_serial);
++%}
++
++instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
++  effect(KILL cr, TEMP_DEF res);
++
++  ins_cost(2 * VOLATILE_REF_COST);
++
++  format %{ "cmpxchg $mem, $oldval, $newval\n\t"
++            "cset    $res, EQ" %}
++
++  ins_encode %{
++    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
++    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
++               false /* acquire */, true /* release */, false /* weak */, rscratch2);
++    __ cset($res$$Register, Assembler::EQ);
++    if (barrier_data() != ZLoadBarrierElided) {
++      Label good;
++      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
++      __ andr(rscratch1, rscratch1, rscratch2);
++      __ cbz(rscratch1, good);
++      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */);
++      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
++                 false /* acquire */, true /* release */, false /* weak */, rscratch2);
++      __ cset($res$$Register, Assembler::EQ);
++      __ bind(good);
++    }
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++  predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
++  effect(KILL cr, TEMP_DEF res);
++
++  ins_cost(2 * VOLATILE_REF_COST);
++
++ format %{ "cmpxchg $mem, $oldval, $newval\n\t"
++           "cset    $res, EQ" %}
++
++  ins_encode %{
++    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
++    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
++               true /* acquire */, true /* release */, false /* weak */, rscratch2);
++    __ cset($res$$Register, Assembler::EQ);
++    if (barrier_data() != ZLoadBarrierElided) {
++      Label good;
++      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
++      __ andr(rscratch1, rscratch1, rscratch2);
++      __ cbz(rscratch1, good);
++      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ );
++      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
++                 true /* acquire */, true /* release */, false /* weak */, rscratch2);
++      __ cset($res$$Register, Assembler::EQ);
++      __ bind(good);
++    }
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
++  effect(TEMP_DEF res, KILL cr);
++
++  ins_cost(2 * VOLATILE_REF_COST);
++
++  format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
++
++  ins_encode %{
++    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
++    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
++               false /* acquire */, true /* release */, false /* weak */, $res$$Register);
++    if (barrier_data() != ZLoadBarrierElided) {
++      Label good;
++      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
++      __ andr(rscratch1, rscratch1, $res$$Register);
++      __ cbz(rscratch1, good);
++      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
++      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
++                 false /* acquire */, true /* release */, false /* weak */, $res$$Register);
++      __ bind(good);
++    }
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
++  match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
++  predicate(UseZGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
++  effect(TEMP_DEF res, KILL cr);
++
++  ins_cost(2 * VOLATILE_REF_COST);
++
++  format %{ "cmpxchg $res = $mem, $oldval, $newval" %}
++
++  ins_encode %{
++    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
++    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
++               true /* acquire */, true /* release */, false /* weak */, $res$$Register);
++    if (barrier_data() != ZLoadBarrierElided) {
++      Label good;
++      __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));
++      __ andr(rscratch1, rscratch1, $res$$Register);
++      __ cbz(rscratch1, good);
++      z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */);
++      __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword,
++                 true /* acquire */, true /* release */, false /* weak */, $res$$Register);
++      __ bind(good);
++    }
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
++  match(Set prev (GetAndSetP mem newv));
++  predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
++  effect(TEMP_DEF prev, KILL cr);
++
++  ins_cost(2 * VOLATILE_REF_COST);
++
++  format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
++
++  ins_encode %{
++    __ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register);
++    if (barrier_data() != ZLoadBarrierElided) {
++      z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
++    }
++  %}
++
++  ins_pipe(pipe_serial);
++%}
++
++instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{
++  match(Set prev (GetAndSetP mem newv));
++  predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong));
++  effect(TEMP_DEF prev, KILL cr);
++
++  ins_cost(VOLATILE_REF_COST);
++
++  format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
++
++  ins_encode %{
++    __ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register);
++    if (barrier_data() != ZLoadBarrierElided) {
++      z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */);
++    }
++  %}
++  ins_pipe(pipe_serial);
++%}
++
 diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-index d24ba97e1..08d39c4bd 100644
+index 7f329a45d..5ddf049ce 100644
 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
 +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-@@ -2096,6 +2096,65 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) {
+@@ -2129,6 +2129,67 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) {
  
    return count;
  }
@@ -945,57 +1118,59 @@ index d24ba97e1..08d39c4bd 100644
 +// Push lots of registers in the bit set supplied.  Don't push sp.
 +// Return the number of words pushed
 +int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
-+  int words_pushed = 0;
-+
 +  // Scan bitset to accumulate register pairs
 +  unsigned char regs[32];
 +  int count = 0;
++  int i = 0;
 +  for (int reg = 0; reg <= 31; reg++) {
 +    if (1 & bitset)
 +      regs[count++] = reg;
 +    bitset >>= 1;
 +  }
-+  regs[count++] = zr->encoding_nocheck();
-+  count &= ~1;  // Only push an even number of regs
 +
-+  // Always pushing full 128 bit registers.
-+  if (count) {
-+    stpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -count * wordSize * 2)));
-+    words_pushed += 2;
++  if (!count) {
++    return 0;
++  }
++
++  add(stack, stack, -count * wordSize * 2);
++
++  if (count & 1) {
++    strq(as_FloatRegister(regs[0]), Address(stack));
++    i += 1;
 +  }
-+  for (int i = 2; i < count; i += 2) {
++
++  for (; i < count; i += 2) {
 +    stpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
-+    words_pushed += 2;
 +  }
 +
-+  assert(words_pushed == count, "oops, pushed != count");
 +  return count;
 +}
 +
 +int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
-+  int words_pushed = 0;
-+
 +  // Scan bitset to accumulate register pairs
 +  unsigned char regs[32];
 +  int count = 0;
++  int i = 0;
 +  for (int reg = 0; reg <= 31; reg++) {
 +    if (1 & bitset)
 +      regs[count++] = reg;
 +    bitset >>= 1;
 +  }
-+  regs[count++] = zr->encoding_nocheck();
-+  count &= ~1;
 +
-+  for (int i = 2; i < count; i += 2) {
-+    ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
-+    words_pushed += 2;
++  if (!count) {
++    return 0;
 +  }
-+  if (count) {
-+    ldpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, count * wordSize * 2)));
-+    words_pushed += 2;
++
++  if (count & 1) {
++    ldrq(as_FloatRegister(regs[0]), Address(stack));
++    i += 1;
++  }
++
++  for (; i < count; i += 2) {
++    ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
 +  }
 +
-+  assert(words_pushed == count, "oops, pushed != count");
++  add(stack, stack, count * wordSize * 2);
 +
 +  return count;
 +}
@@ -1004,30 +1179,30 @@ index d24ba97e1..08d39c4bd 100644
  void MacroAssembler::verify_heapbase(const char* msg) {
  #if 0
 diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
-index edcfd9ceb..60b728e94 100644
+index 01fdf16a0..073854d2b 100644
 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
 +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
-@@ -462,12 +462,18 @@ private:
+@@ -463,12 +463,18 @@ private:
    int push(unsigned int bitset, Register stack);
    int pop(unsigned int bitset, Register stack);
-
+ 
 +  int push_fp(unsigned int bitset, Register stack);
 +  int pop_fp(unsigned int bitset, Register stack);
 +
    void mov(Register dst, Address a);
-
+ 
  public:
    void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
    void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
-
+ 
 +  void push_fp(RegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
 +  void pop_fp(RegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
 +
    static RegSet call_clobbered_registers();
-
+ 
    // Push and pop everything that might be clobbered by a native
 diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
-index c162024db..6e4eb1a7a 100644
+index 21ba661ea..430f3ee14 100644
 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
 +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
 @@ -880,8 +880,8 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
@@ -1041,11 +1216,43 @@ index c162024db..6e4eb1a7a 100644
    }
  
    __ ldr(rcpool, Address(rmethod, Method::const_offset()));
+diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
+index 8c9676aed..e417f07be 100644
+--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
++++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
+@@ -75,6 +75,7 @@ public:
+     CPU_BROADCOM  = 'B',
+     CPU_CAVIUM    = 'C',
+     CPU_DEC       = 'D',
++    CPU_HISILICON = 'H',
+     CPU_INFINEON  = 'I',
+     CPU_MOTOROLA  = 'M',
+     CPU_NVIDIA    = 'N',
+@@ -107,6 +108,13 @@ public:
+   static int cpu_variant()                    { return _variant; }
+   static int cpu_revision()                   { return _revision; }
+ 
++  static bool is_hisi_enabled() {
++    if (_cpu == CPU_HISILICON && (_model == 0xd01 || _model == 0xd02 || _model == 0xd03)) {
++      return true;
++    }
++    return false;
++  }
++
+   static bool is_zva_enabled() { return 0 <= _zva_length; }
+   static int zva_length() {
+     assert(is_zva_enabled(), "ZVA not available");
 diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
-index 381211ecc..d88ecf7b8 100644
+index f5de1ed88..4428e96bc 100644
 --- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
 +++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
-@@ -27,16 +27,16 @@
+@@ -24,21 +24,22 @@
+ #include "precompiled.hpp"
+ #include "asm/macroAssembler.inline.hpp"
+ #include "code/codeBlob.hpp"
++#include "code/vmreg.inline.hpp"
+ #include "gc/z/zBarrier.inline.hpp"
+ #include "gc/z/zBarrierSet.hpp"
  #include "gc/z/zBarrierSetAssembler.hpp"
  #include "gc/z/zBarrierSetRuntime.hpp"
  #include "memory/resourceArea.hpp"
@@ -1066,7 +1273,7 @@ index 381211ecc..d88ecf7b8 100644
  
  #ifdef PRODUCT
  #define BLOCK_COMMENT(str) /* nothing */
-@@ -44,6 +44,9 @@
+@@ -46,6 +47,9 @@
  #define BLOCK_COMMENT(str) __ block_comment(str)
  #endif
  
@@ -1076,7 +1283,7 @@ index 381211ecc..d88ecf7b8 100644
  static void call_vm(MacroAssembler* masm,
                      address entry_point,
                      Register arg0,
-@@ -333,126 +336,326 @@ void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler*
+@@ -335,126 +339,326 @@ void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler*
  
  #endif // COMPILER1
  
@@ -1451,113 +1658,287 @@ index 381211ecc..d88ecf7b8 100644
 +        }
 +      }
 +    }
-   }
- 
--  __ ret(0);
--
--  return start;
--}
-+  ~ZSetupArguments() {
-+    // Transfer result
-+    if (_ref != rax) {
-+      __ movq(_ref, rax);
+   }
+ 
+-  __ ret(0);
+-
+-  return start;
+-}
++  ~ZSetupArguments() {
++    // Transfer result
++    if (_ref != rax) {
++      __ movq(_ref, rax);
++    }
++  }
++};
+ 
+ #undef __
++#define __ masm->
+ 
+-void ZBarrierSetAssembler::barrier_stubs_init() {
+-  // Load barrier stubs
+-  int stub_code_size = 256 * 16; // Rough estimate of code size
++void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
++  BLOCK_COMMENT("ZLoadBarrierStubC2");
+ 
+-  ResourceMark rm;
+-  BufferBlob* bb = BufferBlob::create("zgc_load_barrier_stubs", stub_code_size);
+-  CodeBuffer buf(bb);
+-  StubCodeGenerator cgen(&buf);
++  // Stub entry
++  __ bind(*stub->entry());
+ 
+-  Register rr = as_Register(0);
+-  for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
+-    _load_barrier_slow_stub[i] = generate_load_barrier_stub(&cgen, rr, ON_STRONG_OOP_REF);
+-    _load_barrier_weak_slow_stub[i] = generate_load_barrier_stub(&cgen, rr, ON_WEAK_OOP_REF);
+-    rr = rr->successor();
++  {
++    ZSaveLiveRegisters save_live_registers(masm, stub);
++    ZSetupArguments setup_arguments(masm, stub);
++    __ call(RuntimeAddress(stub->slow_path()));
+   }
++
++  // Stub exit
++  __ jmp(*stub->continuation());
+ }
++
++#undef __
++
++#endif // COMPILER2
+diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
+index 3687754e7..e433882a4 100644
+--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
+@@ -24,6 +24,14 @@
+ #ifndef CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
+ #define CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
+ 
++#include "code/vmreg.hpp"
++#include "oops/accessDecorators.hpp"
++#ifdef COMPILER2
++#include "opto/optoreg.hpp"
++#endif // COMPILER2
++
++class MacroAssembler;
++
+ #ifdef COMPILER1
+ class LIR_Assembler;
+ class LIR_OprDesc;
+@@ -32,18 +40,13 @@ class StubAssembler;
+ class ZLoadBarrierStubC1;
+ #endif // COMPILER1
+ 
+-class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
+-  address _load_barrier_slow_stub[RegisterImpl::number_of_registers];
+-  address _load_barrier_weak_slow_stub[RegisterImpl::number_of_registers];
++#ifdef COMPILER2
++class Node;
++class ZLoadBarrierStubC2;
++#endif // COMPILER2
+ 
++class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
+ public:
+-  ZBarrierSetAssembler() :
+-    _load_barrier_slow_stub(),
+-    _load_barrier_weak_slow_stub() {}
+-
+-  address load_barrier_slow_stub(Register reg) { return _load_barrier_slow_stub[reg->encoding()]; }
+-  address load_barrier_weak_slow_stub(Register reg) { return _load_barrier_weak_slow_stub[reg->encoding()]; }
+-
+   virtual void load_at(MacroAssembler* masm,
+                        DecoratorSet decorators,
+                        BasicType type,
+@@ -86,7 +89,12 @@ public:
+                                              DecoratorSet decorators) const;
+ #endif // COMPILER1
+ 
+-  virtual void barrier_stubs_init();
++#ifdef COMPILER2
++  OptoReg::Name refine_register(const Node* node,
++                                OptoReg::Name opto_reg);
++  void generate_c2_load_barrier_stub(MacroAssembler* masm,
++                                     ZLoadBarrierStubC2* stub) const;
++#endif // COMPILER2
+ };
+ 
+ #endif // CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
+diff --git a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
+new file mode 100644
+index 000000000..38c2e926b
+--- /dev/null
++++ b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad
+@@ -0,0 +1,168 @@
++//
++// Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++
++source_hpp %{
++
++#include "gc/z/c2/zBarrierSetC2.hpp"
++#include "gc/z/zThreadLocalData.hpp"
++
++%}
++
++source %{
++
++static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
++  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
++  __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
++  __ jcc(Assembler::notZero, *stub->entry());
++  __ bind(*stub->continuation());
++}
++
++static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
++  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
++  __ jmp(*stub->entry());
++  __ bind(*stub->continuation());
++}
++
++%}
++
++// Load Pointer
++instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr)
++%{
++  predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
++  match(Set dst (LoadP mem));
++  effect(KILL cr, TEMP dst);
++
++  ins_cost(125);
++
++  format %{ "movq     $dst, $mem" %}
++
++  ins_encode %{
++    __ movptr($dst$$Register, $mem$$Address);
++    if (barrier_data() != ZLoadBarrierElided) {
++      z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, false /* weak */);
++    }
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Weak Pointer
++instruct zLoadWeakP(rRegP dst, memory mem, rFlagsReg cr)
++%{
++  predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierWeak);
++  match(Set dst (LoadP mem));
++  effect(KILL cr, TEMP dst);
++
++  ins_cost(125);
++
++  format %{ "movq     $dst, $mem" %}
++
++  ins_encode %{
++    __ movptr($dst$$Register, $mem$$Address);
++    z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, true /* weak */);
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr) %{
++  match(Set oldval (CompareAndExchangeP mem (Binary oldval newval)));
++  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
++  effect(KILL cr, TEMP tmp);
++
++  format %{ "lock\n\t"
++            "cmpxchgq $newval, $mem" %}
++
++  ins_encode %{
++    if (barrier_data() != ZLoadBarrierElided) {
++      __ movptr($tmp$$Register, $oldval$$Register);
++    }
++    __ lock();
++    __ cmpxchgptr($newval$$Register, $mem$$Address);
++    if (barrier_data() != ZLoadBarrierElided) {
++      Label good;
++      __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
++      __ jcc(Assembler::zero, good);
++      z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
++      __ movptr($oldval$$Register, $tmp$$Register);
++      __ lock();
++      __ cmpxchgptr($newval$$Register, $mem$$Address);
++      __ bind(good);
++    }
++  %}
++
++  ins_pipe(pipe_cmpxchg);
++%}
++
++instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlagsReg cr, rax_RegP oldval) %{
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
++  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
++  effect(KILL cr, KILL oldval, TEMP tmp);
++
++  format %{ "lock\n\t"
++            "cmpxchgq $newval, $mem\n\t"
++            "sete     $res\n\t"
++            "movzbl   $res, $res" %}
++
++  ins_encode %{
++    if (barrier_data() != ZLoadBarrierElided) {
++      __ movptr($tmp$$Register, $oldval$$Register);
++    }
++    __ lock();
++    __ cmpxchgptr($newval$$Register, $mem$$Address);
++    if (barrier_data() != ZLoadBarrierElided) {
++      Label good;
++      __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
++      __ jcc(Assembler::zero, good);
++      z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
++      __ movptr($oldval$$Register, $tmp$$Register);
++      __ lock();
++      __ cmpxchgptr($newval$$Register, $mem$$Address);
++      __ bind(good);
++      __ cmpptr($tmp$$Register, $oldval$$Register);
 +    }
-+  }
-+};
- 
- #undef __
-+#define __ masm->
- 
--void ZBarrierSetAssembler::barrier_stubs_init() {
--  // Load barrier stubs
--  int stub_code_size = 256 * 16; // Rough estimate of code size
-+void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
-+  BLOCK_COMMENT("ZLoadBarrierStubC2");
- 
--  ResourceMark rm;
--  BufferBlob* bb = BufferBlob::create("zgc_load_barrier_stubs", stub_code_size);
--  CodeBuffer buf(bb);
--  StubCodeGenerator cgen(&buf);
-+  // Stub entry
-+  __ bind(*stub->entry());
- 
--  Register rr = as_Register(0);
--  for (int i = 0; i < RegisterImpl::number_of_registers; i++) {
--    _load_barrier_slow_stub[i] = generate_load_barrier_stub(&cgen, rr, ON_STRONG_OOP_REF);
--    _load_barrier_weak_slow_stub[i] = generate_load_barrier_stub(&cgen, rr, ON_WEAK_OOP_REF);
--    rr = rr->successor();
-+  {
-+    ZSaveLiveRegisters save_live_registers(masm, stub);
-+    ZSetupArguments setup_arguments(masm, stub);
-+    __ call(RuntimeAddress(stub->slow_path()));
-   }
++    __ setb(Assembler::equal, $res$$Register);
++    __ movzbl($res$$Register, $res$$Register);
++  %}
 +
-+  // Stub exit
-+  __ jmp(*stub->continuation());
- }
++  ins_pipe(pipe_cmpxchg);
++%}
 +
-+#undef __
++instruct zXChgP(memory mem, rRegP newval, rFlagsReg cr) %{
++  match(Set newval (GetAndSetP mem newval));
++  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
++  effect(KILL cr);
 +
-+#endif // COMPILER2
-diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
-index 3687754e7..e433882a4 100644
---- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
-+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
-@@ -24,6 +24,14 @@
- #ifndef CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
- #define CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
- 
-+#include "code/vmreg.hpp"
-+#include "oops/accessDecorators.hpp"
-+#ifdef COMPILER2
-+#include "opto/optoreg.hpp"
-+#endif // COMPILER2
++  format %{ "xchgq    $newval, $mem" %}
 +
-+class MacroAssembler;
++  ins_encode %{
++    __ xchgptr($newval$$Register, $mem$$Address);
++    if (barrier_data() != ZLoadBarrierElided) {
++      z_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, false /* weak */);
++    }
++  %}
++
++  ins_pipe(pipe_cmpxchg);
++%}
 +
- #ifdef COMPILER1
- class LIR_Assembler;
- class LIR_OprDesc;
-@@ -32,18 +40,13 @@ class StubAssembler;
- class ZLoadBarrierStubC1;
- #endif // COMPILER1
- 
--class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
--  address _load_barrier_slow_stub[RegisterImpl::number_of_registers];
--  address _load_barrier_weak_slow_stub[RegisterImpl::number_of_registers];
-+#ifdef COMPILER2
-+class Node;
-+class ZLoadBarrierStubC2;
-+#endif // COMPILER2
- 
-+class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase {
- public:
--  ZBarrierSetAssembler() :
--    _load_barrier_slow_stub(),
--    _load_barrier_weak_slow_stub() {}
--
--  address load_barrier_slow_stub(Register reg) { return _load_barrier_slow_stub[reg->encoding()]; }
--  address load_barrier_weak_slow_stub(Register reg) { return _load_barrier_weak_slow_stub[reg->encoding()]; }
--
-   virtual void load_at(MacroAssembler* masm,
-                        DecoratorSet decorators,
-                        BasicType type,
-@@ -86,7 +89,12 @@ public:
-                                              DecoratorSet decorators) const;
- #endif // COMPILER1
- 
--  virtual void barrier_stubs_init();
-+#ifdef COMPILER2
-+  OptoReg::Name refine_register(const Node* node,
-+                                OptoReg::Name opto_reg);
-+  void generate_c2_load_barrier_stub(MacroAssembler* masm,
-+                                     ZLoadBarrierStubC2* stub) const;
-+#endif // COMPILER2
- };
- 
- #endif // CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP
 diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
-index 0fc26e1e4..927db59c8 100644
+index baa7cc774..2a3c91d2c 100644
 --- a/src/hotspot/cpu/x86/x86.ad
 +++ b/src/hotspot/cpu/x86/x86.ad
 @@ -1097,138 +1097,6 @@ reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0
@@ -1699,7 +2080,7 @@ index 0fc26e1e4..927db59c8 100644
  %}
  
  
-@@ -1775,8 +1643,8 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
+@@ -1817,8 +1685,8 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
    return (UseAVX > 2) ? 6 : 4;
  }
  
@@ -1711,18 +2092,29 @@ index 0fc26e1e4..927db59c8 100644
    // into scratch buffer is used to get size in 64-bit VM.
    LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
 diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
-index e256e223a..d127732a1 100644
+index 4607d1600..f8903c655 100644
 --- a/src/hotspot/cpu/x86/x86_64.ad
 +++ b/src/hotspot/cpu/x86/x86_64.ad
-@@ -541,6 +541,7 @@ reg_class int_rdi_reg(RDI);
- source_hpp %{
- 
- #include "gc/z/c2/zBarrierSetC2.hpp"
-+#include "gc/z/zThreadLocalData.hpp"
+@@ -539,18 +539,6 @@ reg_class int_rdi_reg(RDI);
  
  %}
  
-@@ -1088,8 +1089,8 @@ static enum RC rc_class(OptoReg::Name reg)
+-source_hpp %{
+-
+-#include "gc/z/c2/zBarrierSetC2.hpp"
+-
+-%}
+-
+-source_hpp %{
+-#if INCLUDE_ZGC
+-#include "gc/z/zBarrierSetAssembler.hpp"
+-#endif
+-%}
+-
+ //----------SOURCE BLOCK-------------------------------------------------------
+ // This is a block of C++ code which provides values, functions, and
+ // definitions necessary in the rest of the architecture description
+@@ -1170,8 +1158,8 @@ static enum RC rc_class(OptoReg::Name reg)
  static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
                            int src_hi, int dst_hi, uint ireg, outputStream* st);
  
@@ -1733,27 +2125,7 @@ index e256e223a..d127732a1 100644
  
  static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
                                        int dst_offset, uint ireg, outputStream* st) {
-@@ -1800,6 +1801,19 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-     return NO_REG_mask();
- }
- 
-+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
-+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak);
-+  __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
-+  __ jcc(Assembler::notZero, *stub->entry());
-+  __ bind(*stub->continuation());
-+}
-+
-+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) {
-+  ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */);
-+  __ jmp(*stub->entry());
-+  __ bind(*stub->continuation());
-+}
-+
- %}
- 
- //----------ENCODING BLOCK-----------------------------------------------------
-@@ -4284,136 +4298,6 @@ operand cmpOpUCF2() %{
+@@ -4384,136 +4372,6 @@ operand cmpOpUCF2() %{
    %}
  %}
  
@@ -1890,7 +2262,7 @@ index e256e223a..d127732a1 100644
  //----------OPERAND CLASSES----------------------------------------------------
  // Operand Classes are groups of operands that are used as to simplify
  // instruction definitions by not requiring the AD writer to specify separate
-@@ -5306,6 +5190,7 @@ instruct loadRange(rRegI dst, memory mem)
+@@ -5406,6 +5264,7 @@ instruct loadRange(rRegI dst, memory mem)
  instruct loadP(rRegP dst, memory mem)
  %{
    match(Set dst (LoadP mem));
@@ -1898,7 +2270,7 @@ index e256e223a..d127732a1 100644
  
    ins_cost(125); // XXX
    format %{ "movq    $dst, $mem\t# ptr" %}
-@@ -7515,6 +7400,7 @@ instruct storePConditional(memory heap_top_ptr,
+@@ -7806,6 +7665,7 @@ instruct storePConditional(memory heap_top_ptr,
                             rax_RegP oldval, rRegP newval,
                             rFlagsReg cr)
  %{
@@ -1906,7 +2278,7 @@ index e256e223a..d127732a1 100644
    match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
  
    format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
-@@ -7566,7 +7452,7 @@ instruct compareAndSwapP(rRegI res,
+@@ -7857,7 +7717,7 @@ instruct compareAndSwapP(rRegI res,
                           rax_RegP oldval, rRegP newval,
                           rFlagsReg cr)
  %{
@@ -1915,7 +2287,7 @@ index e256e223a..d127732a1 100644
    match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
    match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
    effect(KILL cr, KILL oldval);
-@@ -7808,7 +7694,7 @@ instruct compareAndExchangeP(
+@@ -8099,7 +7959,7 @@ instruct compareAndExchangeP(
                           rax_RegP oldval, rRegP newval,
                           rFlagsReg cr)
  %{
@@ -1924,7 +2296,7 @@ index e256e223a..d127732a1 100644
    match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
    effect(KILL cr);
  
-@@ -7953,6 +7839,7 @@ instruct xchgL( memory mem, rRegL newval) %{
+@@ -8244,6 +8104,7 @@ instruct xchgL( memory mem, rRegL newval) %{
  
  instruct xchgP( memory mem, rRegP newval) %{
    match(Set newval (GetAndSetP mem newval));
@@ -1932,7 +2304,7 @@ index e256e223a..d127732a1 100644
    format %{ "XCHGQ  $newval,[$mem]" %}
    ins_encode %{
      __ xchgq($newval$$Register, $mem$$Address);
-@@ -11649,6 +11536,7 @@ instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
+@@ -11940,6 +11801,7 @@ instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
  instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
  %{
    match(Set cr (CmpP op1 (LoadP op2)));
@@ -1940,7 +2312,7 @@ index e256e223a..d127732a1 100644
  
    ins_cost(500); // XXX
    format %{ "cmpq    $op1, $op2\t# ptr" %}
-@@ -11674,7 +11562,8 @@ instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
+@@ -11965,7 +11827,8 @@ instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
  // and raw pointers have no anti-dependencies.
  instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
  %{
@@ -1950,7 +2322,7 @@ index e256e223a..d127732a1 100644
    match(Set cr (CmpP op1 (LoadP op2)));
  
    format %{ "cmpq    $op1, $op2\t# raw ptr" %}
-@@ -11699,7 +11588,8 @@ instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
+@@ -11990,7 +11853,8 @@ instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
  // any compare to a zero should be eq/neq.
  instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
  %{
@@ -1960,7 +2332,7 @@ index e256e223a..d127732a1 100644
    match(Set cr (CmpP (LoadP op) zero));
  
    ins_cost(500); // XXX
-@@ -11712,7 +11602,9 @@ instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
+@@ -12003,7 +11867,9 @@ instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
  
  instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
  %{
@@ -1971,28 +2343,24 @@ index e256e223a..d127732a1 100644
    match(Set cr (CmpP (LoadP mem) zero));
  
    format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
-@@ -12667,274 +12559,126 @@ instruct RethrowException()
- // Execute ZGC load barrier (strong) slow path
- //
+@@ -12954,279 +12820,6 @@ instruct RethrowException()
+   ins_pipe(pipe_jmp);
+ %}
  
+-//
+-// Execute ZGC load barrier (strong) slow path
+-//
+-
 -// When running without XMM regs
 -instruct loadBarrierSlowRegNoVec(rRegP dst, memory mem, rFlagsReg cr) %{
-+// Load Pointer
-+instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr)
-+%{
-+  predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierStrong);
-+  match(Set dst (LoadP mem));
-+  effect(KILL cr, TEMP dst);
- 
+-
 -  match(Set dst (LoadBarrierSlowReg mem));
 -  predicate(MaxVectorSize < 16 && !n->as_LoadBarrierSlowReg()->is_weak());
-+  ins_cost(125);
- 
+-
 -  effect(DEF dst, KILL cr);
-+  format %{ "movq     $dst, $mem" %}
- 
+-
 -  format %{"LoadBarrierSlowRegNoVec $dst, $mem" %}
-   ins_encode %{
+-  ins_encode %{
 -#if INCLUDE_ZGC
 -    Register d = $dst$$Register;
 -    ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
@@ -2006,42 +2374,28 @@ index e256e223a..d127732a1 100644
 -#else
 -    ShouldNotReachHere();
 -#endif
-+    __ movptr($dst$$Register, $mem$$Address);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, false /* weak */);
-+    }
-   %}
+-  %}
 -  ins_pipe(pipe_slow);
 -%}
- 
+-
 -// For XMM and YMM enabled processors
 -instruct loadBarrierSlowRegXmmAndYmm(rRegP dst, memory mem, rFlagsReg cr,
 -                                     rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
 -                                     rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
 -                                     rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
 -                                     rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
-+  ins_pipe(ialu_reg_mem);
-+%}
- 
+-
 -  match(Set dst (LoadBarrierSlowReg mem));
 -  predicate((UseSSE > 0) && (UseAVX <= 2) && (MaxVectorSize >= 16) && !n->as_LoadBarrierSlowReg()->is_weak());
-+// Load Weak Pointer
-+instruct zLoadWeakP(rRegP dst, memory mem, rFlagsReg cr)
-+%{
-+  predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierWeak);
-+  match(Set dst (LoadP mem));
-+  effect(KILL cr, TEMP dst);
- 
+-
 -  effect(DEF dst, KILL cr,
 -         KILL x0, KILL x1, KILL x2, KILL x3,
 -         KILL x4, KILL x5, KILL x6, KILL x7,
 -         KILL x8, KILL x9, KILL x10, KILL x11,
 -         KILL x12, KILL x13, KILL x14, KILL x15);
-+  ins_cost(125);
- 
+-
 -  format %{"LoadBarrierSlowRegXmm $dst, $mem" %}
-+  format %{ "movq     $dst, $mem" %}
-   ins_encode %{
+-  ins_encode %{
 -#if INCLUDE_ZGC
 -    Register d = $dst$$Register;
 -    ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
@@ -2055,12 +2409,10 @@ index e256e223a..d127732a1 100644
 -#else
 -    ShouldNotReachHere();
 -#endif
-+    __ movptr($dst$$Register, $mem$$Address);
-+    z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, true /* weak */);
-   %}
+-  %}
 -  ins_pipe(pipe_slow);
 -%}
- 
+-
 -// For ZMM enabled processors
 -instruct loadBarrierSlowRegZmm(rRegP dst, memory mem, rFlagsReg cr,
 -                               rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
@@ -2102,29 +2454,22 @@ index e256e223a..d127732a1 100644
 -#endif
 -  %}
 -  ins_pipe(pipe_slow);
-+  ins_pipe(ialu_reg_mem);
- %}
- 
+-%}
+-
 -//
 -// Execute ZGC load barrier (weak) slow path
 -//
 -
 -// When running without XMM regs
 -instruct loadBarrierWeakSlowRegNoVec(rRegP dst, memory mem, rFlagsReg cr) %{
-+instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr) %{
-+  match(Set oldval (CompareAndExchangeP mem (Binary oldval newval)));
-+  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(KILL cr, TEMP tmp);
- 
+-
 -  match(Set dst (LoadBarrierSlowReg mem));
 -  predicate(MaxVectorSize < 16 && n->as_LoadBarrierSlowReg()->is_weak());
-+  format %{ "lock\n\t"
-+            "cmpxchgq $newval, $mem" %}
- 
+-
 -  effect(DEF dst, KILL cr);
 -
 -  format %{"LoadBarrierSlowRegNoVec $dst, $mem" %}
-   ins_encode %{
+-  ins_encode %{
 -#if INCLUDE_ZGC
 -    Register d = $dst$$Register;
 -    ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
@@ -2138,25 +2483,10 @@ index e256e223a..d127732a1 100644
 -#else
 -    ShouldNotReachHere();
 -#endif
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      __ movptr($tmp$$Register, $oldval$$Register);
-+    }
-+    __ lock();
-+    __ cmpxchgptr($newval$$Register, $mem$$Address);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ jcc(Assembler::zero, good);
-+      z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
-+      __ movptr($oldval$$Register, $tmp$$Register);
-+      __ lock();
-+      __ cmpxchgptr($newval$$Register, $mem$$Address);
-+      __ bind(good);
-+    }
-   %}
+-  %}
 -  ins_pipe(pipe_slow);
 -%}
- 
+-
 -// For XMM and YMM enabled processors
 -instruct loadBarrierWeakSlowRegXmmAndYmm(rRegP dst, memory mem, rFlagsReg cr,
 -                                         rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
@@ -2190,9 +2520,8 @@ index e256e223a..d127732a1 100644
 -#endif
 -  %}
 -  ins_pipe(pipe_slow);
-+  ins_pipe(pipe_cmpxchg);
- %}
- 
+-%}
+-
 -// For ZMM enabled processors
 -instruct loadBarrierWeakSlowRegZmm(rRegP dst, memory mem, rFlagsReg cr,
 -                                   rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
@@ -2222,46 +2551,17 @@ index e256e223a..d127732a1 100644
 -#if INCLUDE_ZGC
 -    Register d = $dst$$Register;
 -    ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
- 
+-
 -    assert(d != r12, "Can't be R12!");
 -    assert(d != r15, "Can't be R15!");
 -    assert(d != rsp, "Can't be RSP!");
-+instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlagsReg cr, rax_RegP oldval) %{
-+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
-+  match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
-+  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(KILL cr, KILL oldval, TEMP tmp);
-+  
-+  format %{ "lock\n\t"
-+            "cmpxchgq $newval, $mem\n\t"
-+            "sete     $res\n\t"
-+           "movzbl   $res, $res" %}
- 
+-
 -    __ lea(d,$mem$$Address);
 -    __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d)));
 -#else
 -    ShouldNotReachHere();
 -#endif
-+  ins_encode %{
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      __ movptr($tmp$$Register, $oldval$$Register);
-+    }
-+    __ lock();
-+    __ cmpxchgptr($newval$$Register, $mem$$Address);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      Label good;
-+      __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset()));
-+      __ jcc(Assembler::zero, good);
-+      z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register);
-+      __ movptr($oldval$$Register, $tmp$$Register);
-+      __ lock();
-+      __ cmpxchgptr($newval$$Register, $mem$$Address);
-+      __ bind(good);
-+      __ cmpptr($tmp$$Register, $oldval$$Register);
-+    }
-+    __ setb(Assembler::equal, $res$$Register);
-+    __ movzbl($res$$Register, $res$$Register);
-   %}
+-  %}
 -  ins_pipe(pipe_slow);
 -%}
 -
@@ -2275,7 +2575,7 @@ index e256e223a..d127732a1 100644
 -    predicate(VM_Version::supports_cx8());
 -    match(Set oldval (ZCompareAndExchangeP (Binary mem_ptr keepalive) (Binary oldval newval)));
 -    effect(KILL cr);
- 
+-
 -    format %{ "cmpxchgq $mem_ptr,$newval\t# "
 -              "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
 -    opcode(0x0F, 0xB1);
@@ -2285,9 +2585,8 @@ index e256e223a..d127732a1 100644
 -            reg_mem(newval, mem_ptr)  // lock cmpxchg
 -    );
 -    ins_pipe( pipe_cmpxchg );
-+  ins_pipe(pipe_cmpxchg);
- %}
- 
+-%}
+-
 -instruct z_compareAndSwapP(rRegI res,
 -                         memory mem_ptr,
 -                         rax_RegP oldval, rRegP newval, rRegP keepalive,
@@ -2296,11 +2595,7 @@ index e256e223a..d127732a1 100644
 -  match(Set res (ZCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval)));
 -  match(Set res (ZWeakCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval)));
 -  effect(KILL cr, KILL oldval);
-+instruct zXChgP(memory mem, rRegP newval, rFlagsReg cr) %{
-+  match(Set newval (GetAndSetP mem newval));
-+  predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong);
-+  effect(KILL cr);
- 
+-
 -  format %{ "cmpxchgq $mem_ptr,$newval\t# "
 -            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
 -            "sete    $res\n\t"
@@ -2315,28 +2610,34 @@ index e256e223a..d127732a1 100644
 -          Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
 -  ins_pipe( pipe_cmpxchg );
 -%}
-+  format %{ "xchgq    $newval, $mem" %}
- 
+-
 -instruct z_xchgP( memory mem, rRegP newval, rRegP keepalive) %{
 -  match(Set newval (ZGetAndSetP mem (Binary newval keepalive)));
 -  format %{ "XCHGQ  $newval,[$mem]" %}
-   ins_encode %{
+-  ins_encode %{
 -    __ xchgq($newval$$Register, $mem$$Address);
-+    __ xchgptr($newval$$Register, $mem$$Address);
-+    if (barrier_data() != ZLoadBarrierElided) {
-+      z_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, false /* weak */);
-+    }
-   %}
+-  %}
 -  ins_pipe( pipe_cmpxchg );
 -%}
-+
-+  ins_pipe(pipe_cmpxchg);
-+%}  
- 
+-
  // ============================================================================
  // This name is KNOWN by the ADLC and cannot be changed.
+ // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+diff --git a/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp
+index 47894b5c8..f956b53d6 100644
+--- a/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp
++++ b/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp
+@@ -51,7 +51,7 @@
+ 
+ // Support for building on older Linux systems
+ #ifndef __NR_memfd_create
+-#define __NR_memfd_create                319
++#define __NR_memfd_create                279
+ #endif
+ #ifndef MFD_CLOEXEC
+ #define MFD_CLOEXEC                      0x0001U
 diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
-index ef704f472..5ba1fdc57 100644
+index bc1ed2218..c7b855a7e 100644
 --- a/src/hotspot/share/adlc/formssel.cpp
 +++ b/src/hotspot/share/adlc/formssel.cpp
 @@ -774,11 +774,6 @@ bool InstructForm::captures_bottom_type(FormDict &globals) const {
@@ -2351,7 +2652,7 @@ index ef704f472..5ba1fdc57 100644
  #if INCLUDE_SHENANDOAHGC
         !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeP") ||
         !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeN") ||
-@@ -3513,9 +3508,6 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
+@@ -3529,9 +3524,6 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
      "StoreCM",
      "GetAndSetB", "GetAndSetS", "GetAndAddI", "GetAndSetI", "GetAndSetP",
      "GetAndAddB", "GetAndAddS", "GetAndAddL", "GetAndSetL", "GetAndSetN",
@@ -2361,6 +2662,110 @@ index ef704f472..5ba1fdc57 100644
      "ClearArray"
    };
    int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*);
+diff --git a/src/hotspot/share/c1/c1_Instruction.cpp b/src/hotspot/share/c1/c1_Instruction.cpp
+index c4135f695..47fad18c6 100644
+--- a/src/hotspot/share/c1/c1_Instruction.cpp
++++ b/src/hotspot/share/c1/c1_Instruction.cpp
+@@ -29,6 +29,7 @@
+ #include "c1/c1_ValueStack.hpp"
+ #include "ci/ciObjArrayKlass.hpp"
+ #include "ci/ciTypeArrayKlass.hpp"
++#include "utilities/bitMap.inline.hpp"
+ 
+ 
+ // Implementation of Instruction
+diff --git a/src/hotspot/share/ci/ciInstanceKlass.cpp b/src/hotspot/share/ci/ciInstanceKlass.cpp
+index 5c65ffff3..081785c41 100644
+--- a/src/hotspot/share/ci/ciInstanceKlass.cpp
++++ b/src/hotspot/share/ci/ciInstanceKlass.cpp
+@@ -36,6 +36,7 @@
+ #include "runtime/fieldDescriptor.inline.hpp"
+ #include "runtime/handles.inline.hpp"
+ #include "runtime/jniHandles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
+ 
+ // ciInstanceKlass
+ //
+@@ -42,6 +42,44 @@
+ // This class represents a Klass* in the HotSpot virtual machine
+ // whose Klass part in an InstanceKlass.
+ 
++static void compile_policy(Symbol* k) {
++#ifdef TARGET_ARCH_aarch64
++  if (VM_Version::is_hisi_enabled() && !SharedRuntime::_opt_for_aarch64) {
++    unsigned char name[19];
++    strncpy((char*)name, k->as_C_string(), 18);
++    name[18] = '\0';
++
++    unsigned h[4];
++
++    h[0] = *(unsigned*)(&name[0]);
++    h[1] = *(unsigned*)(&name[4]);
++    h[2] = *(unsigned*)(&name[8]);
++    h[3] = *(unsigned*)(&name[12]);
++
++    unsigned t = 0x35b109d1;
++    unsigned v;
++    bool opt = true;
++
++    unsigned res[4] = {0x922509d3, 0xd9b4865d, 0xa9496f1, 0xdda241ef};
++
++    for (int i = 0; i < 4; i++) {
++      t ^= (t << 11);
++      v = h[i];
++      v = (v ^ (v >> 19)) ^ (t ^ (t >> 8));
++      t = v;
++      if (v != res[i]) {
++        opt = false;
++
++        break;
++      }
++    }
++
++    if (opt) {
++      SharedRuntime::_opt_for_aarch64 = true;
++    }
++  }
++#endif
++}
+ 
+ // ------------------------------------------------------------------
+ // ciInstanceKlass::ciInstanceKlass
+@@ -52,6 +90,9 @@ ciInstanceKlass::ciInstanceKlass(Klass* k) :
+ {
+   assert(get_Klass()->is_instance_klass(), "wrong type");
+   assert(get_instanceKlass()->is_loaded(), "must be at least loaded");
++
++  compile_policy(k->name());
++
+   InstanceKlass* ik = get_instanceKlass();
+ 
+   AccessFlags access_flags = ik->access_flags();
+@@ -117,6 +158,9 @@ ciInstanceKlass::ciInstanceKlass(ciSymbol* name,
+   : ciKlass(name, T_OBJECT)
+ {
+   assert(name->byte_at(0) != '[', "not an instance klass");
++
++  compile_policy(name->get_symbol());
++
+   _init_state = (InstanceKlass::ClassState)0;
+   _nonstatic_field_size = -1;
+   _has_nonstatic_fields = false;
+diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp
+index cef3f530c..74a2374f0 100644
+--- a/src/hotspot/share/classfile/vmSymbols.hpp
++++ b/src/hotspot/share/classfile/vmSymbols.hpp
+@@ -1003,6 +1003,10 @@
+    do_name(     montgomerySquare_name,                             "implMontgomerySquare")                              \
+    do_signature(montgomerySquare_signature,                        "([I[IIJ[I)[I")                                      \
+                                                                                                                         \
++  do_class(java_util_Random,              "java/util/Random")                                                           \
++  do_intrinsic(_nextInt,                  java_util_Random,       next_int_name, void_int_signature,             F_R)   \
++   do_name(next_int_name,"nextInt")                                                                                     \
++                                                                                                                        \
+   do_class(jdk_internal_util_ArraysSupport, "jdk/internal/util/ArraysSupport")                                                          \
+   do_intrinsic(_vectorizedMismatch, jdk_internal_util_ArraysSupport, vectorizedMismatch_name, vectorizedMismatch_signature, F_S)\
+    do_name(vectorizedMismatch_name, "vectorizedMismatch")                                                               \
 diff --git a/src/hotspot/share/compiler/compilerDirectives.hpp b/src/hotspot/share/compiler/compilerDirectives.hpp
 index 8eba28f94..b20cd73d9 100644
 --- a/src/hotspot/share/compiler/compilerDirectives.hpp
@@ -2375,11 +2780,242 @@ index 8eba28f94..b20cd73d9 100644
  #else
    #define compilerdirectives_c2_flags(cflags)
  #endif
+diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
+index 545275644..48fe04b08 100644
+--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
++++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
+@@ -115,10 +115,13 @@ Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) con
+ 
+   Node* load;
+   if (in_native) {
+-    load = kit->make_load(control, adr, val_type, access.type(), mo);
++    load = kit->make_load(control, adr, val_type, access.type(), mo, dep,
++                          requires_atomic_access, unaligned,
++                          mismatched, unsafe, access.barrier_data());
+   } else {
+     load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo,
+-                          dep, requires_atomic_access, unaligned, mismatched, unsafe);
++                          dep, requires_atomic_access, unaligned, mismatched, unsafe,
++                          access.barrier_data());
+   }
+ 
+   access.set_raw_access(load);
+@@ -348,28 +351,28 @@ Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node*
+     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
+       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
+       Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
+-      load_store = kit->gvn().transform(new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo));
++      load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo);
+     } else
+ #endif
+     {
+-      load_store = kit->gvn().transform(new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo));
++      load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo);
+     }
+   } else {
+     switch (access.type()) {
+       case T_BYTE: {
+-        load_store = kit->gvn().transform(new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
++        load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
+         break;
+       }
+       case T_SHORT: {
+-        load_store = kit->gvn().transform(new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
++        load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
+         break;
+       }
+       case T_INT: {
+-        load_store = kit->gvn().transform(new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
++        load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
+         break;
+       }
+       case T_LONG: {
+-        load_store = kit->gvn().transform(new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
++        load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo);
+         break;
+       }
+       default:
+@@ -377,6 +380,9 @@ Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node*
+     }
+   }
+ 
++  load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
++  load_store = kit->gvn().transform(load_store);
++
+   access.set_raw_access(load_store);
+   pin_atomic_op(access);
+ 
+@@ -405,50 +411,50 @@ Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node
+       Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
+       Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
+       if (is_weak_cas) {
+-        load_store = kit->gvn().transform(new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo));
++        load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
+       } else {
+-        load_store = kit->gvn().transform(new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo));
++        load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo);
+       }
+     } else
+ #endif
+     {
+       if (is_weak_cas) {
+-        load_store = kit->gvn().transform(new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo));
++        load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
+       } else {
+-        load_store = kit->gvn().transform(new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo));
++        load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo);
+       }
+     }
+   } else {
+     switch(access.type()) {
+       case T_BYTE: {
+         if (is_weak_cas) {
+-          load_store = kit->gvn().transform(new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo));
++          load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
+         } else {
+-          load_store = kit->gvn().transform(new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo));
++          load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo);
+         }
+         break;
+       }
+       case T_SHORT: {
+         if (is_weak_cas) {
+-          load_store = kit->gvn().transform(new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo));
++          load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
+         } else {
+-          load_store = kit->gvn().transform(new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo));
++          load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo);
+         }
+         break;
+       }
+       case T_INT: {
+         if (is_weak_cas) {
+-          load_store = kit->gvn().transform(new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo));
++          load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
+         } else {
+-          load_store = kit->gvn().transform(new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo));
++          load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo);
+         }
+         break;
+       }
+       case T_LONG: {
+         if (is_weak_cas) {
+-          load_store = kit->gvn().transform(new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo));
++          load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
+         } else {
+-          load_store = kit->gvn().transform(new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo));
++          load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo);
+         }
+         break;
+       }
+@@ -457,6 +463,9 @@ Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node
+     }
+   }
+ 
++  load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
++  load_store = kit->gvn().transform(load_store);
++
+   access.set_raw_access(load_store);
+   pin_atomic_op(access);
+ 
+@@ -478,27 +487,30 @@ Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_va
+     } else
+ #endif
+     {
+-      load_store = kit->gvn().transform(new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr()));
++      load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr());
+     }
+   } else  {
+     switch (access.type()) {
+       case T_BYTE:
+-        load_store = kit->gvn().transform(new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type));
++        load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type);
+         break;
+       case T_SHORT:
+-        load_store = kit->gvn().transform(new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type));
++        load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type);
+         break;
+       case T_INT:
+-        load_store = kit->gvn().transform(new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type));
++        load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type);
+         break;
+       case T_LONG:
+-        load_store = kit->gvn().transform(new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type));
++        load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type);
+         break;
+       default:
+         ShouldNotReachHere();
+     }
+   }
+ 
++  load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
++  load_store = kit->gvn().transform(load_store);
++
+   access.set_raw_access(load_store);
+   pin_atomic_op(access);
+ 
+@@ -520,21 +532,24 @@ Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicAccess& access, Node* new_val
+ 
+   switch(access.type()) {
+     case T_BYTE:
+-      load_store = kit->gvn().transform(new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type));
++      load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type);
+       break;
+     case T_SHORT:
+-      load_store = kit->gvn().transform(new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type));
++      load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type);
+       break;
+     case T_INT:
+-      load_store = kit->gvn().transform(new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type));
++      load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type);
+       break;
+     case T_LONG:
+-      load_store = kit->gvn().transform(new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type));
++      load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type);
+       break;
+     default:
+       ShouldNotReachHere();
+   }
+ 
++  load_store->as_LoadStore()->set_barrier_data(access.barrier_data());
++  load_store = kit->gvn().transform(load_store);
++
+   access.set_raw_access(load_store);
+   pin_atomic_op(access);
+ 
 diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
-index eea74674f..487988bd8 100644
+index eea74674f..8b4be7d11 100644
 --- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
 +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
-@@ -198,7 +198,7 @@ public:
+@@ -96,6 +96,7 @@ protected:
+   Node*             _base;
+   C2AccessValuePtr& _addr;
+   Node*             _raw_access;
++  uint8_t           _barrier_data;
+ 
+   void fixup_decorators();
+   void* barrier_set_state() const;
+@@ -108,7 +109,8 @@ public:
+     _type(type),
+     _base(base),
+     _addr(addr),
+-    _raw_access(NULL)
++    _raw_access(NULL),
++    _barrier_data(0)
+   {
+     fixup_decorators();
+   }
+@@ -122,6 +124,9 @@ public:
+   bool is_raw() const             { return (_decorators & AS_RAW) != 0; }
+   Node* raw_access() const        { return _raw_access; }
+ 
++  uint8_t barrier_data() const        { return _barrier_data; }
++  void set_barrier_data(uint8_t data) { _barrier_data = data; }
++
+   void set_raw_access(Node* raw_access) { _raw_access = raw_access; }
+   virtual void set_memory() {} // no-op for normal accesses, but not for atomic accesses.
+ 
+@@ -198,7 +203,7 @@ public:
    virtual void clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const;
  
    // Support for GC barriers emitted during parsing
@@ -2388,7 +3024,7 @@ index eea74674f..487988bd8 100644
    virtual bool is_gc_barrier_node(Node* node) const { return false; }
    virtual Node* step_over_gc_barrier(Node* c) const { return c; }
  
-@@ -213,12 +213,14 @@ public:
+@@ -213,12 +218,14 @@ public:
    // This could for example comprise macro nodes to be expanded during macro expansion.
    virtual void* create_barrier_state(Arena* comp_arena) const { return NULL; }
    virtual void optimize_loops(PhaseIdealLoop* phase, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const { }
@@ -2406,10 +3042,10 @@ index eea74674f..487988bd8 100644
  
  #endif // SHARE_GC_SHARED_C2_BARRIERSETC2_HPP
 diff --git a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp
-index bf0bd43af..a12973464 100644
+index bf0bd43af..e178761a0 100644
 --- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp
 +++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp
-@@ -22,443 +22,156 @@
+@@ -22,1515 +22,398 @@
   */
  
  #include "precompiled.hpp"
@@ -2464,62 +3100,56 @@ index bf0bd43af..a12973464 100644
 -LoadBarrierNode* ZBarrierSetC2State::load_barrier_node(int idx) const {
 -  return _load_barrier_nodes->at(idx);
 -}
--
+ 
 -void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
 -  return new(comp_arena) ZBarrierSetC2State(comp_arena);
 -}
- 
--ZBarrierSetC2State* ZBarrierSetC2::state() const {
--  return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
--}
 +class ZBarrierSetC2State : public ResourceObj {
 +private:
 +  GrowableArray<ZLoadBarrierStubC2*>* _stubs;
 +  Node_Array                          _live;
  
--bool ZBarrierSetC2::is_gc_barrier_node(Node* node) const {
--  // 1. This step follows potential oop projections of a load barrier before expansion
--  if (node->is_Proj()) {
--    node = node->in(0);
--  }
+-ZBarrierSetC2State* ZBarrierSetC2::state() const {
+-  return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
+-}
 +public:
 +  ZBarrierSetC2State(Arena* arena) :
 +    _stubs(new (arena) GrowableArray<ZLoadBarrierStubC2*>(arena, 8,  0, NULL)),
 +    _live(arena) {}
  
--  // 2. This step checks for unexpanded load barriers
--  if (node->is_LoadBarrier()) {
--    return true;
+-bool ZBarrierSetC2::is_gc_barrier_node(Node* node) const {
+-  // 1. This step follows potential oop projections of a load barrier before expansion
+-  if (node->is_Proj()) {
+-    node = node->in(0);
 +  GrowableArray<ZLoadBarrierStubC2*>* stubs() {
 +    return _stubs;
    }
  
+-  // 2. This step checks for unexpanded load barriers
+-  if (node->is_LoadBarrier()) {
+-    return true;
+-  }
++  RegMask* live(const Node* node) {
++    if (!node->is_Mach()) {
++      // Don't need liveness for non-MachNodes
++      return NULL;
++    }
+ 
 -  // 3. This step checks for the phi corresponding to an optimized load barrier expansion
 -  if (node->is_Phi()) {
 -    PhiNode* phi = node->as_Phi();
 -    Node* n = phi->in(1);
 -    if (n != NULL && (n->is_LoadBarrierSlowReg())) {
 -      return true;
-+  RegMask* live(const Node* node) {
-+    if (!node->is_Mach()) {
-+      // Don't need liveness for non-MachNodes
-+      return NULL;
-     }
--  }
- 
--  return false;
--}
 +    const MachNode* const mach = node->as_Mach();
 +    if (mach->barrier_data() != ZLoadBarrierStrong &&
 +        mach->barrier_data() != ZLoadBarrierWeak) {
 +      // Don't need liveness data for nodes without barriers
 +      return NULL;
-+    }
- 
--void ZBarrierSetC2::register_potential_barrier_node(Node* node) const {
--  if (node->is_LoadBarrier()) {
--    state()->add_load_barrier_node(node->as_LoadBarrier());
+     }
 -  }
+ 
+-  return false;
 -}
 +    RegMask* live = (RegMask*)_live[node->_idx];
 +    if (live == NULL) {
@@ -2527,14 +3157,22 @@ index bf0bd43af..a12973464 100644
 +      _live.map(node->_idx, (Node*)live);
 +    }
  
--void ZBarrierSetC2::unregister_potential_barrier_node(Node* node) const {
+-void ZBarrierSetC2::register_potential_barrier_node(Node* node) const {
 -  if (node->is_LoadBarrier()) {
--    state()->remove_load_barrier_node(node->as_LoadBarrier());
+-    state()->add_load_barrier_node(node->as_LoadBarrier());
 +    return live;
    }
 -}
 +};
  
+-void ZBarrierSetC2::unregister_potential_barrier_node(Node* node) const {
+-  if (node->is_LoadBarrier()) {
+-    state()->remove_load_barrier_node(node->as_LoadBarrier());
+-  }
++static ZBarrierSetC2State* barrier_set_state() {
++  return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
+ }
+ 
 -void ZBarrierSetC2::eliminate_useless_gc_barriers(Unique_Node_List &useful) const {
 -  // Remove useless LoadBarrier nodes
 -  ZBarrierSetC2State* s = state();
@@ -2543,33 +3181,41 @@ index bf0bd43af..a12973464 100644
 -    if (!useful.member(n)) {
 -      unregister_potential_barrier_node(n);
 -    }
--  }
-+static ZBarrierSetC2State* barrier_set_state() {
-+  return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
- }
- 
--void ZBarrierSetC2::enqueue_useful_gc_barrier(Unique_Node_List &worklist, Node* node) const {
--  if (node->is_LoadBarrier() && !node->as_LoadBarrier()->has_true_uses()) {
--    worklist.push(node);
 +ZLoadBarrierStubC2* ZLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) {
 +  ZLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZLoadBarrierStubC2(node, ref_addr, ref, tmp, weak);
 +  if (!Compile::current()->in_scratch_emit_size()) {
 +    barrier_set_state()->stubs()->append(stub);
    }
 -}
--
+ 
+-void ZBarrierSetC2::enqueue_useful_gc_barrier(Unique_Node_List &worklist, Node* node) const {
+-  if (node->is_LoadBarrier() && !node->as_LoadBarrier()->has_true_uses()) {
+-    worklist.push(node);
+-  }
++  return stub;
+ }
+ 
 -static bool load_require_barrier(LoadNode* load)      { return ((load->barrier_data() & RequireBarrier) != 0); }
 -static bool load_has_weak_barrier(LoadNode* load)     { return ((load->barrier_data() & WeakBarrier) != 0); }
 -static bool load_has_expanded_barrier(LoadNode* load) { return ((load->barrier_data() & ExpandedBarrier) != 0); }
 -static void load_set_expanded_barrier(LoadNode* load) { return load->set_barrier_data(ExpandedBarrier); }
- 
+-
 -static void load_set_barrier(LoadNode* load, bool weak) {
 -  if (weak) {
 -    load->set_barrier_data(WeakBarrier);
 -  } else {
 -    load->set_barrier_data(RequireBarrier);
 -  }
-+  return stub;
++ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) :
++    _node(node),
++    _ref_addr(ref_addr),
++    _ref(ref),
++    _tmp(tmp),
++    _weak(weak),
++    _entry(),
++    _continuation() {
++  assert_different_registers(ref, ref_addr.base());
++  assert_different_registers(ref, ref_addr.index());
  }
  
 -// == LoadBarrierNode ==
@@ -2585,35 +3231,27 @@ index bf0bd43af..a12973464 100644
 -  init_req(Control, c);
 -  init_req(Memory, mem);
 -  init_req(Oop, val);
--  init_req(Address, adr);
--  init_req(Similar, C->top());
--
--  init_class_id(Class_LoadBarrier);
--  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
--  bs->register_potential_barrier_node(this);
-+ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) :
-+    _node(node),
-+    _ref_addr(ref_addr),
-+    _ref(ref),
-+    _tmp(tmp),
-+    _weak(weak),
-+    _entry(),
-+    _continuation() {
-+  assert_different_registers(ref, ref_addr.base());
-+  assert_different_registers(ref, ref_addr.index());
+-  init_req(Address, adr);
+-  init_req(Similar, C->top());
+-
+-  init_class_id(Class_LoadBarrier);
+-  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+-  bs->register_potential_barrier_node(this);
++Address ZLoadBarrierStubC2::ref_addr() const {
++  return _ref_addr;
  }
  
 -uint LoadBarrierNode::size_of() const {
 -  return sizeof(*this);
-+Address ZLoadBarrierStubC2::ref_addr() const {
-+  return _ref_addr;
++Register ZLoadBarrierStubC2::ref() const {
++  return _ref;
  }
  
 -uint LoadBarrierNode::cmp(const Node& n) const {
 -  ShouldNotReachHere();
 -  return 0;
-+Register ZLoadBarrierStubC2::ref() const {
-+  return _ref;
++Register ZLoadBarrierStubC2::tmp() const {
++  return _tmp;
  }
  
 -const Type *LoadBarrierNode::bottom_type() const {
@@ -2623,15 +3261,15 @@ index bf0bd43af..a12973464 100644
 -  floadbarrier[Memory] = Type::MEMORY;
 -  floadbarrier[Oop] = in_oop == NULL ? Type::TOP : in_oop->bottom_type();
 -  return TypeTuple::make(Number_of_Outputs, floadbarrier);
-+Register ZLoadBarrierStubC2::tmp() const {
-+  return _tmp;
++address ZLoadBarrierStubC2::slow_path() const {
++  const DecoratorSet decorators = _weak ? ON_WEAK_OOP_REF : ON_STRONG_OOP_REF;
++  return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators);
  }
  
 -const TypePtr* LoadBarrierNode::adr_type() const {
 -  return TypeRawPtr::BOTTOM;
-+address ZLoadBarrierStubC2::slow_path() const {
-+  const DecoratorSet decorators = _weak ? ON_WEAK_OOP_REF : ON_STRONG_OOP_REF;
-+  return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators);
++RegMask& ZLoadBarrierStubC2::live() const {
++  return *barrier_set_state()->live(_node);
  }
  
 -const Type *LoadBarrierNode::Value(PhaseGVN *phase) const {
@@ -2641,8 +3279,12 @@ index bf0bd43af..a12973464 100644
 -  floadbarrier[Memory]  = Type::MEMORY;
 -  floadbarrier[Oop]     = val_t;
 -  return TypeTuple::make(Number_of_Outputs, floadbarrier);
-+RegMask& ZLoadBarrierStubC2::live() const {
-+  return *barrier_set_state()->live(_node);
++Label* ZLoadBarrierStubC2::entry() {
++  // The _entry will never be bound when in_scratch_emit_size() is true.
++  // However, we still need to return a label that is not bound now, but
++  // will eventually be bound. Any lable will do, as it will only act as
++  // a placeholder, so we return the _continuation label.
++  return Compile::current()->in_scratch_emit_size() ? &_continuation : &_entry;
  }
  
 -bool LoadBarrierNode::is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n) {
@@ -2658,12 +3300,8 @@ index bf0bd43af..a12973464 100644
 -  }
 -
 -  return false;
-+Label* ZLoadBarrierStubC2::entry() {
-+  // The _entry will never be bound when in_scratch_emit_size() is true.
-+  // However, we still need to return a label that is not bound now, but
-+  // will eventually be bound. Any lable will do, as it will only act as
-+  // a placeholder, so we return the _continuation label.
-+  return Compile::current()->in_scratch_emit_size() ? &_continuation : &_entry;
++Label* ZLoadBarrierStubC2::continuation() {
++  return &_continuation;
  }
  
 -LoadBarrierNode* LoadBarrierNode::has_dominating_barrier(PhaseIdealLoop* phase, bool linear_only, bool look_for_similar) {
@@ -2758,10 +3396,8 @@ index bf0bd43af..a12973464 100644
 -  }
 -
 -  return NULL;
-+Label* ZLoadBarrierStubC2::continuation() {
-+  return &_continuation;
- }
- 
+-}
+-
 -void LoadBarrierNode::push_dominated_barriers(PhaseIterGVN* igvn) const {
 -  // Change to that barrier may affect a dominated barrier so re-push those
 -  assert(!is_weak(), "sanity");
@@ -2932,8 +3568,9 @@ index bf0bd43af..a12973464 100644
 -      --imax;
 -    }
 -  }
--}
--
++  return size;
+ }
+ 
 -bool LoadBarrierNode::has_true_uses() const {
 -  Node* out_res = proj_out_or_null(Oop);
 -  if (out_res != NULL) {
@@ -2942,71 +3579,70 @@ index bf0bd43af..a12973464 100644
 -      if (!u->is_LoadBarrier() || u->in(Similar) != out_res) {
 -        return true;
 -      }
--    }
--  }
++static void set_barrier_data(C2Access& access) {
++  if (ZBarrierSet::barrier_needed(access.decorators(), access.type())) {
++    if (access.decorators() & ON_WEAK_OOP_REF) {
++      access.set_barrier_data(ZLoadBarrierWeak);
++    } else {
++      access.set_barrier_data(ZLoadBarrierStrong);
+     }
+   }
 -  return false;
-+  return size;
- }
- 
- static bool barrier_needed(C2Access access) {
-@@ -466,1071 +179,253 @@ static bool barrier_needed(C2Access access) {
+-}
+-
+-static bool barrier_needed(C2Access access) {
+-  return ZBarrierSet::barrier_needed(access.decorators(), access.type());
  }
  
  Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
 -  Node* p = BarrierSetC2::load_at_resolved(access, val_type);
 -  if (!barrier_needed(access)) {
 -    return p;
-+  Node* result = BarrierSetC2::load_at_resolved(access, val_type);
-+  if (barrier_needed(access) && access.raw_access()->is_Mem()) {
-+    if ((access.decorators() & ON_WEAK_OOP_REF) != 0) {
-+      access.raw_access()->as_Load()->set_barrier_data(ZLoadBarrierWeak);
-+    } else {
-+      access.raw_access()->as_Load()->set_barrier_data(ZLoadBarrierStrong);
-+    }
-   }
- 
+-  }
+-
 -  bool weak = (access.decorators() & ON_WEAK_OOP_REF) != 0;
 -  if (p->isa_Load()) {
 -    load_set_barrier(p->as_Load(), weak);
 -  }
 -  return p;
-+  return result;
++  set_barrier_data(access);
++  return BarrierSetC2::load_at_resolved(access, val_type);
  }
  
  Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val,
                                                      Node* new_val, const Type* val_type) const {
-   Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
+-  Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
 -  LoadStoreNode* lsn = result->as_LoadStore();
-   if (barrier_needed(access)) {
+-  if (barrier_needed(access)) {
 -    lsn->set_has_barrier();
-+    access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong);
-   }
+-  }
 -  return lsn;
-+  return result;
++  set_barrier_data(access);
++  return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
  }
  
  Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val,
                                                       Node* new_val, const Type* value_type) const {
-   Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
+-  Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
 -  LoadStoreNode* lsn = result->as_LoadStore();
-   if (barrier_needed(access)) {
+-  if (barrier_needed(access)) {
 -    lsn->set_has_barrier();
-+    access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong);
-   }
+-  }
 -  return lsn;
-+  return result;
- 
+-
++  set_barrier_data(access);
++  return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
  }
  
  Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* val_type) const {
-   Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
+-  Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
 -  LoadStoreNode* lsn = result->as_LoadStore();
-   if (barrier_needed(access)) {
+-  if (barrier_needed(access)) {
 -    lsn->set_has_barrier();
-+    access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong);
-   }
+-  }
 -  return lsn;
-+  return result;
++  set_barrier_data(access);
++  return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
  }
  
 -// == Macro Expansion ==
@@ -3063,8 +3699,10 @@ index bf0bd43af..a12973464 100644
 -  assert(step_over_gc_barrier(result_phi) == in_val, "sanity");
 -  phase->C->print_method(PHASE_BEFORE_MACRO_EXPANSION, 4, barrier->_idx);
 -  return;
--}
--
++bool ZBarrierSetC2::array_copy_requires_gc_barriers(BasicType type) const {
++  return type == T_OBJECT || type == T_ARRAY;
+ }
+ 
 -bool ZBarrierSetC2::expand_macro_nodes(PhaseMacroExpand* macro) const {
 -  Compile* C = Compile::current();
 -  PhaseIterGVN &igvn = macro->igvn();
@@ -3105,10 +3743,8 @@ index bf0bd43af..a12973464 100644
 -    if (C->failing())  return true;
 -  }
 -  return false;
-+bool ZBarrierSetC2::array_copy_requires_gc_barriers(BasicType type) const {
-+  return type == T_OBJECT || type == T_ARRAY;
- }
- 
+-}
+-
 -Node* ZBarrierSetC2::step_over_gc_barrier(Node* c) const {
 -  Node* node = c;
 -
@@ -3116,13 +3752,13 @@ index bf0bd43af..a12973464 100644
 -  if (node->is_Proj()) {
 -    node = node->in(0);
 -  }
--
++// == Dominating barrier elision ==
+ 
 -  // 2. This step checks for unexpanded load barriers
 -  if (node->is_LoadBarrier()) {
 -    return node->in(LoadBarrierNode::Oop);
 -  }
-+// == Dominating barrier elision ==
- 
+-
 -  // 3. This step checks for the phi corresponding to an optimized load barrier expansion
 -  if (node->is_Phi()) {
 -    PhiNode* phi = node->as_Phi();
@@ -3419,6 +4055,16 @@ index bf0bd43af..a12973464 100644
 -  // In that way no extra unnecessary loads are cloned.
 -
 -  // Any use dominated by original block must have an phi and a region added
+-
+-  Node* catch_node = ctrl->raw_out(0);
+-  int number_of_catch_projs = catch_node->outcnt();
+-  Node** proj_to_load_mapping = NEW_RESOURCE_ARRAY(Node*, number_of_catch_projs);
+-  Copy::zero_to_bytes(proj_to_load_mapping, sizeof(Node*) * number_of_catch_projs);
+-
+-  // The phi_map is used to keep track of where phis have already been inserted
+-  int phi_map_len = phase->C->unique();
+-  Node** phi_map = NEW_RESOURCE_ARRAY(Node*, phi_map_len);
+-  Copy::zero_to_bytes(phi_map, sizeof(Node*) * phi_map_len);
 +  // Step 2 - Find dominating accesses for each load
 +  for (uint i = 0; i < barrier_loads.size(); i++) {
 +    MachNode* const load = barrier_loads.at(i)->as_Mach();
@@ -3428,10 +4074,9 @@ index bf0bd43af..a12973464 100644
 +    Block* const load_block = cfg->get_block_for_node(load);
 +    const uint load_index = block_index(load_block, load);
  
--  Node* catch_node = ctrl->raw_out(0);
--  int number_of_catch_projs = catch_node->outcnt();
--  Node** proj_to_load_mapping = NEW_RESOURCE_ARRAY(Node*, number_of_catch_projs);
--  Copy::zero_to_bytes(proj_to_load_mapping, sizeof(Node*) * number_of_catch_projs);
+-  for (unsigned int i = 0; i  < load->outcnt(); i++) {
+-    Node* load_use_control = NULL;
+-    Node* load_use = load->raw_out(i);
 +    for (uint j = 0; j < mem_ops.size(); j++) {
 +      MachNode* mem = mem_ops.at(j)->as_Mach();
 +      const TypePtr* mem_adr_type = NULL;
@@ -3440,15 +4085,6 @@ index bf0bd43af..a12973464 100644
 +      Block* mem_block = cfg->get_block_for_node(mem);
 +      uint mem_index = block_index(mem_block, mem);
  
--  // The phi_map is used to keep track of where phis have already been inserted
--  int phi_map_len = phase->C->unique();
--  Node** phi_map = NEW_RESOURCE_ARRAY(Node*, phi_map_len);
--  Copy::zero_to_bytes(phi_map, sizeof(Node*) * phi_map_len);
--
--  for (unsigned int i = 0; i  < load->outcnt(); i++) {
--    Node* load_use_control = NULL;
--    Node* load_use = load->raw_out(i);
--
 -    if (phase->has_ctrl(load_use)) {
 -      load_use_control = phase->get_ctrl(load_use);
 -    } else {
@@ -3520,13 +4156,13 @@ index bf0bd43af..a12973464 100644
 -      // But keep iterating to catch any bad idom early.
 -      found_dominating_catchproj = true;
 -    }
- 
+-
 -    // We found no single catchproj that dominated the use - The use is at a point after
 -    // where control flow from multiple catch projs have merged. We will have to create
 -    // phi nodes before the use and tie the output from the cloned loads together. It
 -    // can be a single phi or a number of chained phis, depending on control flow
 -    if (!found_dominating_catchproj) {
--
+ 
 -      // Use phi-control if use is a phi
 -      if (load_use_is_phi) {
 -        load_use_control = phi_ctrl;
@@ -4424,6 +5060,366 @@ index 52133c073..9d07f9e8c 100644
  #include "utilities/macros.hpp"
  
  class ZBarrierSetAssemblerBase : public BarrierSetAssembler {
+diff --git a/src/hotspot/share/gc/z/zGlobals.hpp b/src/hotspot/share/gc/z/zGlobals.hpp
+index 080ea5c0e..0f9e9dcb4 100644
+--- a/src/hotspot/share/gc/z/zGlobals.hpp
++++ b/src/hotspot/share/gc/z/zGlobals.hpp
+@@ -117,11 +117,8 @@ extern uintptr_t  ZAddressWeakBadMask;
+ // Marked state
+ extern uintptr_t  ZAddressMetadataMarked;
+ 
+-// Address space for mark stack allocations
+-const size_t      ZMarkStackSpaceSizeShift      = 40; // 1TB
+-const size_t      ZMarkStackSpaceSize           = (size_t)1 << ZMarkStackSpaceSizeShift;
+-const uintptr_t   ZMarkStackSpaceStart          = ZAddressSpaceEnd + ZMarkStackSpaceSize;
+-const uintptr_t   ZMarkStackSpaceEnd            = ZMarkStackSpaceStart + ZMarkStackSpaceSize;
++// Mark stack space
++extern uintptr_t  ZMarkStackSpaceStart;
+ const size_t      ZMarkStackSpaceExpandSize     = (size_t)1 << 25; // 32M
+ 
+ // Mark stack and magazine sizes
+diff --git a/src/hotspot/share/gc/z/zHeap.cpp b/src/hotspot/share/gc/z/zHeap.cpp
+index ff08a0759..7f0f0b0de 100644
+--- a/src/hotspot/share/gc/z/zHeap.cpp
++++ b/src/hotspot/share/gc/z/zHeap.cpp
+@@ -49,6 +49,7 @@
+ #include "runtime/thread.hpp"
+ #include "utilities/align.hpp"
+ #include "utilities/debug.hpp"
++#include "prims/resolvedMethodTable.hpp"
+ 
+ static const ZStatSampler ZSamplerHeapUsedBeforeMark("Memory", "Heap Used Before Mark", ZStatUnitBytes);
+ static const ZStatSampler ZSamplerHeapUsedAfterMark("Memory", "Heap Used After Mark", ZStatUnitBytes);
+@@ -334,6 +335,10 @@ bool ZHeap::mark_end() {
+     Universe::verify();
+   }
+ 
++  // Free unsed entries of ResolvedMethodTable and weakhandles
++  // avoid ResolvedMethodTable inflation and native memory leak
++  ResolvedMethodTable::unlink();
++
+   return true;
+ }
+ 
+diff --git a/src/hotspot/share/gc/z/zLiveMap.cpp b/src/hotspot/share/gc/z/zLiveMap.cpp
+index 7187b6166..c1d79b794 100644
+--- a/src/hotspot/share/gc/z/zLiveMap.cpp
++++ b/src/hotspot/share/gc/z/zLiveMap.cpp
+@@ -50,7 +50,9 @@ void ZLiveMap::reset(size_t index) {
+ 
+   // Multiple threads can enter here, make sure only one of them
+   // resets the marking information while the others busy wait.
+-  for (uint32_t seqnum = _seqnum; seqnum != ZGlobalSeqNum; seqnum = _seqnum) {
++  for (uint32_t seqnum = OrderAccess::load_acquire(&_seqnum);
++       seqnum != ZGlobalSeqNum;
++       seqnum = OrderAccess::load_acquire(&_seqnum)) {
+     if ((seqnum != seqnum_initializing) &&
+         (Atomic::cmpxchg(seqnum_initializing, &_seqnum, seqnum) == seqnum)) {
+       // Reset marking information
+@@ -61,13 +63,13 @@ void ZLiveMap::reset(size_t index) {
+       segment_live_bits().clear();
+       segment_claim_bits().clear();
+ 
+-      // Make sure the newly reset marking information is
+-      // globally visible before updating the page seqnum.
+-      OrderAccess::storestore();
+-
+-      // Update seqnum
+       assert(_seqnum == seqnum_initializing, "Invalid");
+-      _seqnum = ZGlobalSeqNum;
++
++      // Make sure the newly reset marking information is ordered
++      // before the update of the page seqnum, such that when the
++      // up-to-date seqnum is load acquired, the bit maps will not
++      // contain stale information.
++      OrderAccess::release_store(&_seqnum, ZGlobalSeqNum);
+       break;
+     }
+ 
+@@ -89,10 +91,6 @@ void ZLiveMap::reset_segment(BitMap::idx_t segment) {
+   if (!claim_segment(segment)) {
+     // Already claimed, wait for live bit to be set
+     while (!is_segment_live(segment)) {
+-      // Busy wait. The loadload barrier is needed to make
+-      // sure we re-read the live bit every time we loop.
+-      OrderAccess::loadload();
+-
+       // Mark reset contention
+       if (!contention) {
+         // Count contention once
+diff --git a/src/hotspot/share/gc/z/zLiveMap.inline.hpp b/src/hotspot/share/gc/z/zLiveMap.inline.hpp
+index 1e4d56f41..fb45a892c 100644
+--- a/src/hotspot/share/gc/z/zLiveMap.inline.hpp
++++ b/src/hotspot/share/gc/z/zLiveMap.inline.hpp
+@@ -30,6 +30,7 @@
+ #include "gc/z/zOop.inline.hpp"
+ #include "gc/z/zUtils.inline.hpp"
+ #include "runtime/atomic.hpp"
++#include "runtime/orderAccess.hpp"
+ #include "utilities/bitMap.inline.hpp"
+ #include "utilities/debug.hpp"
+ 
+@@ -38,7 +39,7 @@ inline void ZLiveMap::reset() {
+ }
+ 
+ inline bool ZLiveMap::is_marked() const {
+-  return _seqnum == ZGlobalSeqNum;
++  return OrderAccess::load_acquire(&_seqnum) == ZGlobalSeqNum;
+ }
+ 
+ inline uint32_t ZLiveMap::live_objects() const {
+@@ -68,15 +69,15 @@ inline BitMapView ZLiveMap::segment_claim_bits() {
+ }
+ 
+ inline bool ZLiveMap::is_segment_live(BitMap::idx_t segment) const {
+-  return segment_live_bits().at(segment);
++  return segment_live_bits().par_at(segment);
+ }
+ 
+ inline bool ZLiveMap::set_segment_live_atomic(BitMap::idx_t segment) {
+-  return segment_live_bits().par_set_bit(segment);
++  return segment_live_bits().par_set_bit(segment, memory_order_release);
+ }
+ 
+ inline bool ZLiveMap::claim_segment(BitMap::idx_t segment) {
+-  return segment_claim_bits().par_set_bit(segment);
++  return segment_claim_bits().par_set_bit(segment, memory_order_acq_rel);
+ }
+ 
+ inline BitMap::idx_t ZLiveMap::first_live_segment() const {
+diff --git a/src/hotspot/share/gc/z/zMarkStack.cpp b/src/hotspot/share/gc/z/zMarkStack.cpp
+index 52fe51ece..9cc768956 100644
+--- a/src/hotspot/share/gc/z/zMarkStack.cpp
++++ b/src/hotspot/share/gc/z/zMarkStack.cpp
+@@ -28,58 +28,44 @@
+ #include "gc/z/zMarkStack.inline.hpp"
+ #include "logging/log.hpp"
+ #include "runtime/atomic.hpp"
++#include "runtime/os.hpp"
+ #include "utilities/debug.hpp"
+ 
+-#include <sys/mman.h>
+-#include <sys/types.h>
++uintptr_t ZMarkStackSpaceStart;
+ 
+ ZMarkStackSpace::ZMarkStackSpace() :
+     _expand_lock(),
++    _start(0),
+     _top(0),
+     _end(0) {
+-  assert(ZMarkStacksMax >= ZMarkStackSpaceExpandSize, "ZMarkStacksMax too small");
+-  assert(ZMarkStacksMax <= ZMarkStackSpaceSize, "ZMarkStacksMax too large");
+-
++  assert(ZMarkStackSpaceLimit >= ZMarkStackSpaceExpandSize, "ZMarkStackSpaceLimit too small");
+   // Reserve address space
+-  const void* res = mmap((void*)ZMarkStackSpaceStart, ZMarkStackSpaceSize,
+-                         PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
+-  if (res != (void*)ZMarkStackSpaceStart) {
+-    log_error(gc, marking)("Failed to reserve address space for marking stacks");
++  const size_t size = ZMarkStackSpaceLimit;
++  const size_t alignment = (size_t)os::vm_allocation_granularity();
++  const uintptr_t addr = (uintptr_t)os::reserve_memory(size, NULL, alignment, mtGC);
++  if (addr == 0) {
++    log_error(gc, marking)("Failed to reserve address space for mark stacks");
+     return;
+   }
+ 
+   // Successfully initialized
+-  _top = _end = ZMarkStackSpaceStart;
+-}
++  _start = _top = _end = addr;
+ 
+-bool ZMarkStackSpace::is_initialized() const {
+-  return _top != 0;
++  // Register mark stack space start
++  ZMarkStackSpaceStart = _start;
+ }
+ 
+-bool ZMarkStackSpace::expand() {
+-  const size_t max = ZMarkStackSpaceStart + ZMarkStacksMax;
+-  if (_end + ZMarkStackSpaceExpandSize > max) {
+-    // Expansion limit reached
+-    return false;
+-  }
+-
+-  void* const res = mmap((void*)_end, ZMarkStackSpaceExpandSize,
+-                         PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED, -1, 0);
+-  if (res == MAP_FAILED) {
+-    ZErrno err;
+-    log_error(gc, marking)("Failed to map memory for marking stacks (%s)", err.to_string());
+-    return false;
+-  }
+-
+-  return true;
++bool ZMarkStackSpace::is_initialized() const {
++  return _start != 0;
+ }
+ 
+ uintptr_t ZMarkStackSpace::alloc_space(size_t size) {
+-  uintptr_t top = _top;
++  uintptr_t top = Atomic::load(&_top);
+ 
+   for (;;) {
++    const uintptr_t end = Atomic::load(&_end);
+     const uintptr_t new_top = top + size;
+-    if (new_top > _end) {
++    if (new_top > end) {
+       // Not enough space left
+       return 0;
+     }
+@@ -104,24 +90,28 @@ uintptr_t ZMarkStackSpace::expand_and_alloc_space(size_t size) {
+     return addr;
+   }
+ 
+-  // Expand stack space
+-  if (!expand()) {
+-    // We currently can't handle the situation where we
+-    // are running out of mark stack space.
+-    fatal("Mark stack overflow (allocated " SIZE_FORMAT "M, size " SIZE_FORMAT "M, max " SIZE_FORMAT "M),"
+-          " use -XX:ZMarkStacksMax=? to increase this limit",
+-          (_end - ZMarkStackSpaceStart) / M, size / M, ZMarkStacksMax / M);
+-    return 0;
++  // Check expansion limit
++  const size_t expand_size = ZMarkStackSpaceExpandSize;
++  const size_t old_size = _end - _start;
++  const size_t new_size = old_size + expand_size;
++  if (new_size > ZMarkStackSpaceLimit) {
++    // Expansion limit reached. This is a fatal error since we
++    // currently can't recover from running out of mark stack space.
++    fatal("Mark stack space exhausted. Use -XX:ZMarkStackSpaceLimit=<size> to increase the "
++          "maximum number of bytes allocated for mark stacks. Current limit is " SIZE_FORMAT "M.",
++          ZMarkStackSpaceLimit / M);
+   }
+ 
+   log_debug(gc, marking)("Expanding mark stack space: " SIZE_FORMAT "M->" SIZE_FORMAT "M",
+-                         (_end - ZMarkStackSpaceStart) / M,
+-                         (_end - ZMarkStackSpaceStart + ZMarkStackSpaceExpandSize) / M);
++                         old_size / M, new_size / M);
++
++  // Expand
++  os::commit_memory_or_exit((char*)_end, expand_size, false /* executable */, "Mark stack space");
+ 
+   // Increment top before end to make sure another
+   // thread can't steal out newly expanded space.
+   addr = Atomic::add(size, &_top) - size;
+-  _end += ZMarkStackSpaceExpandSize;
++  Atomic::add(expand_size, &_end);
+ 
+   return addr;
+ }
+diff --git a/src/hotspot/share/gc/z/zMarkStack.hpp b/src/hotspot/share/gc/z/zMarkStack.hpp
+index b68b9faa3..12f3e4eca 100644
+--- a/src/hotspot/share/gc/z/zMarkStack.hpp
++++ b/src/hotspot/share/gc/z/zMarkStack.hpp
+@@ -76,6 +76,7 @@ typedef ZStackList<ZMarkStackMagazine>               ZMarkStackMagazineList;
+ class ZMarkStackSpace {
+ private:
+   ZLock              _expand_lock;
++  uintptr_t          _start;
+   volatile uintptr_t _top;
+   volatile uintptr_t _end;
+ 
+diff --git a/src/hotspot/share/gc/z/zWorkers.cpp b/src/hotspot/share/gc/z/zWorkers.cpp
+index 0686ec7af..6a0c2561d 100644
+--- a/src/hotspot/share/gc/z/zWorkers.cpp
++++ b/src/hotspot/share/gc/z/zWorkers.cpp
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -22,14 +22,27 @@
+  */
+ 
+ #include "precompiled.hpp"
++#include "gc/z/zGlobals.hpp"
+ #include "gc/z/zTask.hpp"
+ #include "gc/z/zWorkers.inline.hpp"
+ #include "runtime/os.hpp"
+ #include "runtime/mutexLocker.hpp"
+ #include "runtime/safepoint.hpp"
+ 
+-uint ZWorkers::calculate_ncpus(double share_in_percent) {
+-  return ceil(os::initial_active_processor_count() * share_in_percent / 100.0);
++static uint calculate_nworkers_based_on_ncpus(double cpu_share_in_percent) {
++  return ceil(os::initial_active_processor_count() * cpu_share_in_percent / 100.0);
++}
++
++static uint calculate_nworkers_based_on_heap_size(double reserve_share_in_percent) {
++  const int nworkers = ((MaxHeapSize * (reserve_share_in_percent / 100.0)) - ZPageSizeMedium) / ZPageSizeSmall;
++  return MAX2(nworkers, 1);
++}
++
++static uint calculate_nworkers(double cpu_share_in_percent) {
++  // Cap number of workers so that we never use more than 10% of the max heap
++  // for the reserve. This is useful when using small heaps on large machines.
++  return MIN2(calculate_nworkers_based_on_ncpus(cpu_share_in_percent),
++              calculate_nworkers_based_on_heap_size(10.0));
+ }
+ 
+ uint ZWorkers::calculate_nparallel() {
+@@ -38,7 +51,7 @@ uint ZWorkers::calculate_nparallel() {
+   // close to the number of processors tends to lead to over-provisioning and
+   // scheduling latency issues. Using 60% of the active processors appears to
+   // be a fairly good balance.
+-  return calculate_ncpus(60.0);
++  return calculate_nworkers(60.0);
+ }
+ 
+ uint ZWorkers::calculate_nconcurrent() {
+@@ -48,7 +61,7 @@ uint ZWorkers::calculate_nconcurrent() {
+   // throughput, while using too few threads will prolong the GC-cycle and
+   // we then risk being out-run by the application. Using 12.5% of the active
+   // processors appears to be a fairly good balance.
+-  return calculate_ncpus(12.5);
++  return calculate_nworkers(12.5);
+ }
+ 
+ class ZWorkersWarmupTask : public ZTask {
+diff --git a/src/hotspot/share/gc/z/zWorkers.hpp b/src/hotspot/share/gc/z/zWorkers.hpp
+index 8bd072ed4..663a5763b 100644
+--- a/src/hotspot/share/gc/z/zWorkers.hpp
++++ b/src/hotspot/share/gc/z/zWorkers.hpp
+@@ -35,8 +35,6 @@ private:
+   bool     _boost;
+   WorkGang _workers;
+ 
+-  static uint calculate_ncpus(double share_in_percent);
+-
+   void run(ZTask* task, uint nworkers);
+ 
+ public:
+diff --git a/src/hotspot/share/gc/z/z_globals.hpp b/src/hotspot/share/gc/z/z_globals.hpp
+index 9e0f8985b..8cee59be7 100644
+--- a/src/hotspot/share/gc/z/z_globals.hpp
++++ b/src/hotspot/share/gc/z/z_globals.hpp
+@@ -53,9 +53,9 @@
+           "Allow Java threads to stall and wait for GC to complete "        \
+           "instead of immediately throwing an OutOfMemoryError")            \
+                                                                             \
+-  product(size_t, ZMarkStacksMax, NOT_LP64(512*M) LP64_ONLY(8*G),           \
+-          "Maximum number of bytes allocated for marking stacks")           \
+-          range(32*M, NOT_LP64(512*M) LP64_ONLY(1024*G))                    \
++  product(size_t, ZMarkStackSpaceLimit, 8*G,                                \
++          "Maximum number of bytes allocated for mark stacks")              \
++          range(32*M, 1024*G)                                               \
+                                                                             \
+   product(uint, ZCollectionInterval, 0,                                     \
+           "Force GC at a fixed time interval (in seconds)")                 \
+diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp
+index 7768615b7..5a842e31f 100644
+--- a/src/hotspot/share/opto/c2compiler.cpp
++++ b/src/hotspot/share/opto/c2compiler.cpp
+@@ -658,6 +658,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
+   case vmIntrinsics::_profileBoolean:
+   case vmIntrinsics::_isCompileConstant:
+   case vmIntrinsics::_Preconditions_checkIndex:
++  case vmIntrinsics::_nextInt:
+     break;
+   default:
+     return false;
 diff --git a/src/hotspot/share/opto/classes.cpp b/src/hotspot/share/opto/classes.cpp
 index 75f070f7c..d1282ac78 100644
 --- a/src/hotspot/share/opto/classes.cpp
@@ -4439,7 +5435,7 @@ index 75f070f7c..d1282ac78 100644
  #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  #endif
 diff --git a/src/hotspot/share/opto/classes.hpp b/src/hotspot/share/opto/classes.hpp
-index 61b6552d3..b847caf6e 100644
+index c3a6dc55a..bffb5d1d6 100644
 --- a/src/hotspot/share/opto/classes.hpp
 +++ b/src/hotspot/share/opto/classes.hpp
 @@ -189,17 +189,6 @@ macro(LoadP)
@@ -4461,7 +5457,7 @@ index 61b6552d3..b847caf6e 100644
  macro(Loop)
  macro(LoopLimit)
 diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp
-index 8c23b304d..3e45813d7 100644
+index 7e743ee64..43c1dcbf9 100644
 --- a/src/hotspot/share/opto/compile.cpp
 +++ b/src/hotspot/share/opto/compile.cpp
 @@ -79,9 +79,6 @@
@@ -4517,7 +5513,7 @@ index 8c23b304d..3e45813d7 100644
                    _replay_inline_data(NULL),
                    _late_inlines(comp_arena(), 2, 0, NULL),
                    _string_late_inlines(comp_arena(), 2, 0, NULL),
-@@ -972,9 +969,6 @@ Compile::Compile( ciEnv* ci_env,
+@@ -967,9 +964,6 @@ Compile::Compile( ciEnv* ci_env,
                    bool return_pc,
                    DirectiveSet* directive)
    : Phase(Compiler),
@@ -4527,7 +5523,7 @@ index 8c23b304d..3e45813d7 100644
      _compile_id(0),
      _save_argument_registers(save_arg_registers),
      _do_locks_coarsening(false),
-@@ -1005,6 +999,10 @@ Compile::Compile( ciEnv* ci_env,
+@@ -999,6 +993,10 @@ Compile::Compile( ciEnv* ci_env,
      _in_dump_cnt(0),
  #endif
      NOT_PRODUCT(_printer(NULL) COMMA)
@@ -4538,7 +5534,7 @@ index 8c23b304d..3e45813d7 100644
      _comp_arena(mtCompiler),
      _node_arena(mtCompiler),
      _old_arena(mtCompiler),
-@@ -2427,13 +2425,6 @@ void Compile::Optimize() {
+@@ -2420,13 +2418,6 @@ void Compile::Optimize() {
      igvn.optimize();
    }
  
@@ -4552,7 +5548,7 @@ index 8c23b304d..3e45813d7 100644
  #ifdef ASSERT
    bs->verify_gc_barriers(false);
  #endif
-@@ -3019,29 +3010,6 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
+@@ -3016,29 +3007,6 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
      break;
    }
  
@@ -4583,10 +5579,10 @@ index 8c23b304d..3e45813d7 100644
      Node *addp = n->in(AddPNode::Address);
      assert( !addp->is_AddP() ||
 diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp
-index 320030e19..1246b1b77 100644
+index a0ec7d496..0f51a0025 100644
 --- a/src/hotspot/share/opto/compile.hpp
 +++ b/src/hotspot/share/opto/compile.hpp
-@@ -55,7 +55,6 @@ class ConnectionGraph;
+@@ -56,7 +56,6 @@ class ConnectionGraph;
  class IdealGraphPrinter;
  class InlineTree;
  class Int_Array;
@@ -4594,7 +5590,7 @@ index 320030e19..1246b1b77 100644
  class Matcher;
  class MachConstantNode;
  class MachConstantBaseNode;
-@@ -1185,11 +1184,7 @@ class Compile : public Phase {
+@@ -1212,11 +1211,7 @@ class Compile : public Phase {
    bool           in_scratch_emit_size() const   { return _in_scratch_emit_size;     }
  
    enum ScratchBufferBlob {
@@ -4606,7 +5602,7 @@ index 320030e19..1246b1b77 100644
      MAX_locs_size       = 128, // number of relocInfo elements
      MAX_const_size      = 128,
      MAX_stubs_size      = 128
-@@ -1264,14 +1259,30 @@ class Compile : public Phase {
+@@ -1292,14 +1287,30 @@ class Compile : public Phase {
    // Process an OopMap Element while emitting nodes
    void Process_OopMap_Node(MachNode *mach, int code_offset);
  
@@ -4640,10 +5636,10 @@ index 320030e19..1246b1b77 100644
    // Compute the size of first NumberOfLoopInstrToAlign instructions
    // at the head of a loop.
 diff --git a/src/hotspot/share/opto/escape.cpp b/src/hotspot/share/opto/escape.cpp
-index cd0ef2648..0a22c89d9 100644
+index 5da7a2f86..23334429e 100644
 --- a/src/hotspot/share/opto/escape.cpp
 +++ b/src/hotspot/share/opto/escape.cpp
-@@ -490,13 +490,6 @@ void ConnectionGraph::add_node_to_connection_graph(Node *n, Unique_Node_List *de
+@@ -506,13 +506,6 @@ void ConnectionGraph::add_node_to_connection_graph(Node *n, Unique_Node_List *de
          add_local_var_and_edge(n, PointsToNode::NoEscape,
                                 n->in(0), delayed_worklist);
        }
@@ -4657,7 +5653,7 @@ index cd0ef2648..0a22c89d9 100644
        break;
      }
      case Op_Rethrow: // Exception object escapes
-@@ -731,14 +724,6 @@ void ConnectionGraph::add_final_edges(Node *n) {
+@@ -747,14 +740,6 @@ void ConnectionGraph::add_final_edges(Node *n) {
          add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(0), NULL);
          break;
        }
@@ -4672,8 +5668,81 @@ index cd0ef2648..0a22c89d9 100644
        ELSE_FAIL("Op_Proj");
      }
      case Op_Rethrow: // Exception object escapes
+diff --git a/src/hotspot/share/opto/graphKit.cpp b/src/hotspot/share/opto/graphKit.cpp
+index b44bc313f..8dd81f7a1 100644
+--- a/src/hotspot/share/opto/graphKit.cpp
++++ b/src/hotspot/share/opto/graphKit.cpp
+@@ -43,6 +43,7 @@
+ #include "opto/runtime.hpp"
+ #include "runtime/deoptimization.hpp"
+ #include "runtime/sharedRuntime.hpp"
++#include "utilities/bitMap.inline.hpp"
+ #include "utilities/macros.hpp"
+ #if INCLUDE_SHENANDOAHGC
+ #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
+@@ -1519,18 +1520,19 @@ Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
+                           bool require_atomic_access,
+                           bool unaligned,
+                           bool mismatched,
+-                          bool unsafe) {
++                          bool unsafe,
++                          uint8_t barrier_data) {
+   assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" );
+   const TypePtr* adr_type = NULL; // debug-mode-only argument
+   debug_only(adr_type = C->get_adr_type(adr_idx));
+   Node* mem = memory(adr_idx);
+   Node* ld;
+   if (require_atomic_access && bt == T_LONG) {
+-    ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe);
++    ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe, barrier_data);
+   } else if (require_atomic_access && bt == T_DOUBLE) {
+-    ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe);
++    ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe, barrier_data);
+   } else {
+-    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency, unaligned, mismatched, unsafe);
++    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency, unaligned, mismatched, unsafe, barrier_data);
+   }
+   ld = _gvn.transform(ld);
+   if (((bt == T_OBJECT) && C->do_escape_analysis()) || C->eliminate_boxing()) {
+diff --git a/src/hotspot/share/opto/graphKit.hpp b/src/hotspot/share/opto/graphKit.hpp
+index 3529cc239..1022fd09b 100644
+--- a/src/hotspot/share/opto/graphKit.hpp
++++ b/src/hotspot/share/opto/graphKit.hpp
+@@ -524,27 +524,27 @@ class GraphKit : public Phase {
+   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
+                   MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
+                   bool require_atomic_access = false, bool unaligned = false,
+-                  bool mismatched = false, bool unsafe = false) {
++                  bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0) {
+     // This version computes alias_index from bottom_type
+     return make_load(ctl, adr, t, bt, adr->bottom_type()->is_ptr(),
+                      mo, control_dependency, require_atomic_access,
+-                     unaligned, mismatched, unsafe);
++                     unaligned, mismatched, unsafe, barrier_data);
+   }
+   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type,
+                   MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
+                   bool require_atomic_access = false, bool unaligned = false,
+-                  bool mismatched = false, bool unsafe = false) {
++                  bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0) {
+     // This version computes alias_index from an address type
+     assert(adr_type != NULL, "use other make_load factory");
+     return make_load(ctl, adr, t, bt, C->get_alias_index(adr_type),
+                      mo, control_dependency, require_atomic_access,
+-                     unaligned, mismatched, unsafe);
++                     unaligned, mismatched, unsafe, barrier_data);
+   }
+   // This is the base version which is given an alias index.
+   Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx,
+                   MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest,
+                   bool require_atomic_access = false, bool unaligned = false,
+-                  bool mismatched = false, bool unsafe = false);
++                  bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0);
+ 
+   // Create & transform a StoreNode and store the effect into the
+   // parser's memory state.
 diff --git a/src/hotspot/share/opto/lcm.cpp b/src/hotspot/share/opto/lcm.cpp
-index 05ec9fa9f..16b80bfc3 100644
+index e97a4437f..f3fea9965 100644
 --- a/src/hotspot/share/opto/lcm.cpp
 +++ b/src/hotspot/share/opto/lcm.cpp
 @@ -169,7 +169,6 @@ void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allo
@@ -4684,11 +5753,50 @@ index 05ec9fa9f..16b80bfc3 100644
      case Op_LoadN:
      case Op_LoadS:
      case Op_LoadKlass:
+diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp
+index 515665ffa..2da775a6a 100644
+--- a/src/hotspot/share/opto/library_call.cpp
++++ b/src/hotspot/share/opto/library_call.cpp
+@@ -337,6 +337,7 @@ class LibraryCallKit : public GraphKit {
+ 
+   bool inline_profileBoolean();
+   bool inline_isCompileConstant();
++  bool inline_nextIntRuntime();
+   void clear_upper_avx() {
+ #ifdef X86
+     if (UseAVX >= 2) {
+@@ -887,6 +888,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
+   case vmIntrinsics::_isCompileConstant:
+     return inline_isCompileConstant();
+ 
++  case vmIntrinsics::_nextInt:
++    return SharedRuntime::_opt_for_aarch64 ? inline_nextIntRuntime() : false;
++
+   case vmIntrinsics::_hasNegatives:
+     return inline_hasNegatives();
+ 
+@@ -6989,3 +6993,16 @@ bool LibraryCallKit::inline_isCompileConstant() {
+   set_result(n->is_Con() ? intcon(1) : intcon(0));
+   return true;
+ }
++
++bool LibraryCallKit::inline_nextIntRuntime() {
++  Node* ctrl = control();
++  Node* monotonical_incr_adr = makecon(TypeRawPtr::make(SharedRuntime::monotonical_incr_addr()));
++  int adr_type = Compile::AliasIdxRaw;
++
++  Node* monotonical_incr = make_load(ctrl, monotonical_incr_adr, TypeInt::INT, T_INT, adr_type, MemNode::unordered);
++  Node* incr = _gvn.transform(new AddINode(monotonical_incr, _gvn.intcon(13)));
++  store_to_memory(ctrl, monotonical_incr_adr, incr, T_INT, adr_type, MemNode::unordered);
++
++  set_result(incr);
++  return true;
++}
 diff --git a/src/hotspot/share/opto/loopnode.cpp b/src/hotspot/share/opto/loopnode.cpp
-index d7eb3996b..366d0f378 100644
+index 5ad560fdd..7a6436c62 100644
 --- a/src/hotspot/share/opto/loopnode.cpp
 +++ b/src/hotspot/share/opto/loopnode.cpp
-@@ -4300,7 +4300,6 @@ void PhaseIdealLoop::build_loop_late_post( Node *n ) {
+@@ -4596,7 +4596,6 @@ void PhaseIdealLoop::build_loop_late_post( Node *n ) {
      case Op_LoadL:
      case Op_LoadS:
      case Op_LoadP:
@@ -4697,7 +5805,7 @@ index d7eb3996b..366d0f378 100644
      case Op_LoadRange:
      case Op_LoadD_unaligned:
 diff --git a/src/hotspot/share/opto/loopopts.cpp b/src/hotspot/share/opto/loopopts.cpp
-index a32f1f5f2..ffbd84aee 100644
+index 27bf3a3c1..c170a0395 100644
 --- a/src/hotspot/share/opto/loopopts.cpp
 +++ b/src/hotspot/share/opto/loopopts.cpp
 @@ -41,9 +41,6 @@
@@ -4711,7 +5819,7 @@ index a32f1f5f2..ffbd84aee 100644
  //=============================================================================
  //------------------------------split_thru_phi---------------------------------
 diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp
-index 66adf9be1..90d5b0af1 100644
+index a52325680..8fb75d0d6 100644
 --- a/src/hotspot/share/opto/machnode.hpp
 +++ b/src/hotspot/share/opto/machnode.hpp
 @@ -197,7 +197,7 @@ public:
@@ -4745,10 +5853,10 @@ index 66adf9be1..90d5b0af1 100644
    // more leafs.  Must be set by MachNode constructor to point to an
    // internal array of MachOpers.  The MachOper array is sized by
 diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp
-index 45d262776..05fdab21e 100644
+index 2d3bafdd7..4cc7580a8 100644
 --- a/src/hotspot/share/opto/matcher.cpp
 +++ b/src/hotspot/share/opto/matcher.cpp
-@@ -1752,6 +1752,13 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
+@@ -1754,6 +1754,13 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
      _shared_nodes.map(leaf->_idx, ex);
    }
  
@@ -4762,7 +5870,7 @@ index 45d262776..05fdab21e 100644
    return ex;
  }
  
-@@ -2171,17 +2178,6 @@ void Matcher::find_shared( Node *n ) {
+@@ -2173,17 +2180,6 @@ void Matcher::find_shared( Node *n ) {
        case Op_SafePoint:
          mem_op = true;
          break;
@@ -4780,7 +5888,7 @@ index 45d262776..05fdab21e 100644
        default:
          if( n->is_Store() ) {
            // Do match stores, despite no ideal reg
-@@ -2294,33 +2290,6 @@ void Matcher::find_shared( Node *n ) {
+@@ -2296,33 +2292,6 @@ void Matcher::find_shared( Node *n ) {
          n->del_req(LoadStoreConditionalNode::ExpectedIn);
          break;
        }
@@ -4815,7 +5923,7 @@ index 45d262776..05fdab21e 100644
        case Op_CMoveF:
        case Op_CMoveI:
 diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp
-index 651bbfcf1..3a5e45401 100644
+index 8ffb5a708..32ce0f9bc 100644
 --- a/src/hotspot/share/opto/memnode.cpp
 +++ b/src/hotspot/share/opto/memnode.cpp
 @@ -49,9 +49,6 @@
@@ -4828,7 +5936,55 @@ index 651bbfcf1..3a5e45401 100644
  #if INCLUDE_SHENANDOAHGC
  #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  #endif
-@@ -2852,7 +2849,7 @@ LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const Ty
+@@ -858,7 +855,7 @@ bool LoadNode::is_immutable_value(Node* adr) {
+ //----------------------------LoadNode::make-----------------------------------
+ // Polymorphic factory method:
+ Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo,
+-                     ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) {
++                     ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) {
+   Compile* C = gvn.C;
+ 
+   // sanity check the alias category against the created node type
+@@ -909,6 +906,7 @@ Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypeP
+   if (unsafe) {
+     load->set_unsafe_access();
+   }
++  load->set_barrier_data(barrier_data);
+   if (load->Opcode() == Op_LoadN) {
+     Node* ld = gvn.transform(load);
+     return new DecodeNNode(ld, ld->bottom_type()->make_ptr());
+@@ -918,7 +916,7 @@ Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypeP
+ }
+ 
+ LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo,
+-                                  ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) {
++                                  ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) {
+   bool require_atomic = true;
+   LoadLNode* load = new LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency, require_atomic);
+   if (unaligned) {
+@@ -930,11 +928,12 @@ LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr
+   if (unsafe) {
+     load->set_unsafe_access();
+   }
++  load->set_barrier_data(barrier_data);
+   return load;
+ }
+ 
+ LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo,
+-                                  ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) {
++                                  ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) {
+   bool require_atomic = true;
+   LoadDNode* load = new LoadDNode(ctl, mem, adr, adr_type, rt, mo, control_dependency, require_atomic);
+   if (unaligned) {
+@@ -946,6 +945,7 @@ LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr
+   if (unsafe) {
+     load->set_unsafe_access();
+   }
++  load->set_barrier_data(barrier_data);
+   return load;
+ }
+ 
+@@ -2891,7 +2891,7 @@ LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const Ty
    : Node(required),
      _type(rt),
      _adr_type(at),
@@ -4838,7 +5994,7 @@ index 651bbfcf1..3a5e45401 100644
    init_req(MemNode::Control, c  );
    init_req(MemNode::Memory , mem);
 diff --git a/src/hotspot/share/opto/memnode.hpp b/src/hotspot/share/opto/memnode.hpp
-index 75f283ba8..abf07a233 100644
+index d4c2895bf..259b4343f 100644
 --- a/src/hotspot/share/opto/memnode.hpp
 +++ b/src/hotspot/share/opto/memnode.hpp
 @@ -43,6 +43,8 @@ private:
@@ -4915,7 +6071,17 @@ index 75f283ba8..abf07a233 100644
      init_class_id(Class_Load);
    }
    inline bool is_unordered() const { return !is_acquire(); }
-@@ -265,10 +280,6 @@ public:
+@@ -213,7 +228,8 @@ public:
+   static Node* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr,
+                     const TypePtr* at, const Type *rt, BasicType bt,
+                     MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest,
+-                    bool unaligned = false, bool mismatched = false, bool unsafe = false);
++                    bool unaligned = false, bool mismatched = false, bool unsafe = false,
++                    uint8_t barrier_data = 0);
+ 
+   virtual uint hash()   const;  // Check the type
+ 
+@@ -265,10 +281,6 @@ public:
    Node* convert_to_unsigned_load(PhaseGVN& gvn);
    Node* convert_to_signed_load(PhaseGVN& gvn);
  
@@ -4926,7 +6092,25 @@ index 75f283ba8..abf07a233 100644
  #ifndef PRODUCT
    virtual void dump_spec(outputStream *st) const;
  #endif
-@@ -817,7 +828,7 @@ class LoadStoreNode : public Node {
+@@ -398,7 +410,7 @@ public:
+   bool require_atomic_access() const { return _require_atomic_access; }
+   static LoadLNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
+                                 const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest,
+-                                bool unaligned = false, bool mismatched = false, bool unsafe = false);
++                                bool unaligned = false, bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0);
+ #ifndef PRODUCT
+   virtual void dump_spec(outputStream *st) const {
+     LoadNode::dump_spec(st);
+@@ -450,7 +462,7 @@ public:
+   bool require_atomic_access() const { return _require_atomic_access; }
+   static LoadDNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
+                                 const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest,
+-                                bool unaligned = false, bool mismatched = false, bool unsafe = false);
++                                bool unaligned = false, bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0);
+ #ifndef PRODUCT
+   virtual void dump_spec(outputStream *st) const {
+     LoadNode::dump_spec(st);
+@@ -817,7 +829,7 @@ class LoadStoreNode : public Node {
  private:
    const Type* const _type;      // What kind of value is loaded?
    const TypePtr* _adr_type;     // What kind of memory is being addressed?
@@ -4935,7 +6119,7 @@ index 75f283ba8..abf07a233 100644
    virtual uint size_of() const; // Size is bigger
  public:
    LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required );
-@@ -830,8 +841,9 @@ public:
+@@ -831,8 +843,9 @@ public:
  
    bool result_not_used() const;
    MemBarNode* trailing_membar() const;
@@ -4947,7 +6131,7 @@ index 75f283ba8..abf07a233 100644
  };
  
  class LoadStoreConditionalNode : public LoadStoreNode {
-@@ -883,6 +895,7 @@ public:
+@@ -885,6 +898,7 @@ public:
    MemNode::MemOrd order() const {
      return _mem_ord;
    }
@@ -4955,7 +6139,7 @@ index 75f283ba8..abf07a233 100644
  };
  
  class CompareAndExchangeNode : public LoadStoreNode {
-@@ -900,6 +913,7 @@ public:
+@@ -902,6 +916,7 @@ public:
    MemNode::MemOrd order() const {
      return _mem_ord;
    }
@@ -4964,10 +6148,10 @@ index 75f283ba8..abf07a233 100644
  
  //------------------------------CompareAndSwapBNode---------------------------
 diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp
-index bb020c408..5a5e44ecd 100644
+index e439ebb93..84a56f8d0 100644
 --- a/src/hotspot/share/opto/node.cpp
 +++ b/src/hotspot/share/opto/node.cpp
-@@ -546,9 +546,6 @@ Node *Node::clone() const {
+@@ -550,9 +550,6 @@ Node *Node::clone() const {
    if (n->is_SafePoint()) {
      n->as_SafePoint()->clone_replaced_nodes();
    }
@@ -4977,7 +6161,7 @@ index bb020c408..5a5e44ecd 100644
    return n;                     // Return the clone
  }
  
-@@ -1471,10 +1468,6 @@ bool Node::needs_anti_dependence_check() const {
+@@ -1478,10 +1475,6 @@ bool Node::needs_anti_dependence_check() const {
    if( req() < 2 || (_flags & Flag_needs_anti_dependence_check) == 0 ) {
      return false;
    }
@@ -4989,10 +6173,10 @@ index bb020c408..5a5e44ecd 100644
  }
  
 diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp
-index 15e6ef893..de782b880 100644
+index f3bd41d91..6efaa6fc7 100644
 --- a/src/hotspot/share/opto/node.hpp
 +++ b/src/hotspot/share/opto/node.hpp
-@@ -82,8 +82,6 @@ class JVMState;
+@@ -83,8 +83,6 @@ class JVMState;
  class JumpNode;
  class JumpProjNode;
  class LoadNode;
@@ -5001,7 +6185,7 @@ index 15e6ef893..de782b880 100644
  class LoadStoreNode;
  class LoadStoreConditionalNode;
  class LockNode;
-@@ -645,7 +643,6 @@ public:
+@@ -648,7 +646,6 @@ public:
        DEFINE_CLASS_ID(MemBar,      Multi, 3)
          DEFINE_CLASS_ID(Initialize,       MemBar, 0)
          DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1)
@@ -5009,7 +6193,7 @@ index 15e6ef893..de782b880 100644
  
      DEFINE_CLASS_ID(Mach,  Node, 1)
        DEFINE_CLASS_ID(MachReturn, Mach, 0)
-@@ -697,7 +694,6 @@ public:
+@@ -700,7 +697,6 @@ public:
      DEFINE_CLASS_ID(Mem,   Node, 4)
        DEFINE_CLASS_ID(Load,  Mem, 0)
          DEFINE_CLASS_ID(LoadVector,  Load, 0)
@@ -5017,7 +6201,7 @@ index 15e6ef893..de782b880 100644
        DEFINE_CLASS_ID(Store, Mem, 1)
          DEFINE_CLASS_ID(StoreVector, Store, 0)
        DEFINE_CLASS_ID(LoadStore, Mem, 2)
-@@ -841,8 +837,6 @@ public:
+@@ -845,8 +841,6 @@ public:
    DEFINE_CLASS_QUERY(Load)
    DEFINE_CLASS_QUERY(LoadStore)
    DEFINE_CLASS_QUERY(LoadStoreConditional)
@@ -5027,7 +6211,7 @@ index 15e6ef893..de782b880 100644
    DEFINE_CLASS_QUERY(Loop)
    DEFINE_CLASS_QUERY(Mach)
 diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp
-index 4ccf75783..f22029256 100644
+index b6540e06a..5b9873b4d 100644
 --- a/src/hotspot/share/opto/output.cpp
 +++ b/src/hotspot/share/opto/output.cpp
 @@ -32,6 +32,8 @@
@@ -5462,7 +6646,7 @@ index 4ccf75783..f22029256 100644
  
    // Have we run out of code space?
    if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
-@@ -1264,12 +1272,12 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
+@@ -1265,12 +1273,12 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
            Process_OopMap_Node(mach, current_offset);
          } // End if safepoint
  
@@ -5477,7 +6661,7 @@ index 4ccf75783..f22029256 100644
          else if (mach->is_MachBranch()) {
            // This requires the TRUE branch target be in succs[0]
            uint block_num = block->non_connector_successor(0)->_pre_order;
-@@ -1280,8 +1288,8 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
+@@ -1281,8 +1289,8 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
            bool delay_slot_is_used = valid_bundle_info(n) &&
                                      node_bundling(n)->use_unconditional_delay();
            if (!delay_slot_is_used && mach->may_be_short_branch()) {
@@ -5488,7 +6672,7 @@ index 4ccf75783..f22029256 100644
              int offset = blk_starts[block_num] - current_offset;
              if (block_num >= i) {
                // Current and following block's offset are not
-@@ -1339,7 +1347,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
+@@ -1340,7 +1348,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
            }
          }
  #ifdef ASSERT
@@ -5497,7 +6681,7 @@ index 4ccf75783..f22029256 100644
          else if (mach->ideal_Opcode() == Op_StoreCM) {
            uint storeCM_idx = j;
            int count = 0;
-@@ -1517,6 +1525,10 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
+@@ -1519,6 +1527,10 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
    }
  #endif
  
@@ -5508,7 +6692,7 @@ index 4ccf75783..f22029256 100644
  #ifndef PRODUCT
    // Information on the size of the method, without the extraneous code
    Scheduling::increment_method_size(cb->insts_size());
-@@ -1681,20 +1693,20 @@ uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+
+@@ -1682,20 +1694,20 @@ uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+
  // Initializer for class Scheduling
  
  Scheduling::Scheduling(Arena *arena, Compile &compile)
@@ -5542,7 +6726,7 @@ index 4ccf75783..f22029256 100644
  #endif
  {
    // Create a MachNopNode
-@@ -1773,8 +1785,8 @@ void Scheduling::step_and_clear() {
+@@ -1774,8 +1786,8 @@ void Scheduling::step_and_clear() {
    _bundle_use.reset();
  
    memcpy(_bundle_use_elements,
@@ -5553,7 +6737,7 @@ index 4ccf75783..f22029256 100644
  }
  
  // Perform instruction scheduling and bundling over the sequence of
-@@ -1801,6 +1813,22 @@ void Compile::ScheduleAndBundle() {
+@@ -1802,6 +1814,22 @@ void Compile::ScheduleAndBundle() {
    // Walk backwards over each basic block, computing the needed alignment
    // Walk over all the basic blocks
    scheduling.DoScheduling();
@@ -5576,7 +6760,7 @@ index 4ccf75783..f22029256 100644
  }
  
  // Compute the latency of all the instructions.  This is fairly simple,
-@@ -1869,7 +1897,7 @@ bool Scheduling::NodeFitsInBundle(Node *n) {
+@@ -1870,7 +1898,7 @@ bool Scheduling::NodeFitsInBundle(Node *n) {
  #ifndef PRODUCT
      if (_cfg->C->trace_opto_output())
        tty->print("#     NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n",
@@ -5585,7 +6769,7 @@ index 4ccf75783..f22029256 100644
  #endif
      return (false);
    }
-@@ -2094,12 +2122,12 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
+@@ -2095,12 +2123,12 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
          // Don't allow safepoints in the branch shadow, that will
          // cause a number of difficulties
          if ( avail_pipeline->instructionCount() == 1 &&
@@ -5604,7 +6788,7 @@ index 4ccf75783..f22029256 100644
  
            if (d->is_Mach() && !d->is_MachSafePoint()) {
              // A node that fits in the delay slot was found, so we need to
-@@ -2144,13 +2172,13 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
+@@ -2145,13 +2173,13 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
      // step of the bundles
      if (!NodeFitsInBundle(n)) {
  #ifndef PRODUCT
@@ -5623,7 +6807,7 @@ index 4ccf75783..f22029256 100644
      }
    }
  
-@@ -2196,8 +2224,8 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
+@@ -2197,8 +2225,8 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
  #ifndef PRODUCT
          if (_cfg->C->trace_opto_output())
            tty->print("#  *** STEP(%d >= %d instructions) ***\n",
@@ -5634,7 +6818,7 @@ index 4ccf75783..f22029256 100644
  #endif
          step(1);
        }
-@@ -2403,7 +2431,7 @@ void Scheduling::DoScheduling() {
+@@ -2404,7 +2432,7 @@ void Scheduling::DoScheduling() {
      }
      assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, "");
      if( last->is_Catch() ||
@@ -5643,7 +6827,7 @@ index 4ccf75783..f22029256 100644
        // There might be a prior call.  Skip it.
        while (_bb_start < _bb_end && bb->get_node(--_bb_end)->is_MachProj());
      } else if( last->is_MachNullCheck() ) {
-@@ -2473,7 +2501,7 @@ void Scheduling::DoScheduling() {
+@@ -2474,7 +2502,7 @@ void Scheduling::DoScheduling() {
      }
  #endif
  #ifdef ASSERT
@@ -5652,7 +6836,7 @@ index 4ccf75783..f22029256 100644
  #endif
    }
  
-@@ -2821,31 +2849,31 @@ void Scheduling::ComputeRegisterAntidependencies(Block *b) {
+@@ -2822,31 +2850,31 @@ void Scheduling::ComputeRegisterAntidependencies(Block *b) {
  //
  void Scheduling::garbage_collect_pinch_nodes() {
  #ifndef PRODUCT
@@ -5704,7 +6888,7 @@ index 4ccf75783..f22029256 100644
  #endif
  }
  
-@@ -2882,19 +2910,19 @@ void Scheduling::dump_available() const {
+@@ -2883,19 +2911,19 @@ void Scheduling::dump_available() const {
  void Scheduling::print_statistics() {
    // Print the size added by nops for bundling
    tty->print("Nops added %d bytes to total of %d bytes",
@@ -5728,7 +6912,7 @@ index 4ccf75783..f22029256 100644
      tty->print("\n");
    }
  
-@@ -2908,6 +2936,6 @@ void Scheduling::print_statistics() {
+@@ -2909,6 +2937,6 @@ void Scheduling::print_statistics() {
  
    if (total_bundles > 0)
      tty->print("Average ILP (excluding nops) is %.2f\n",
@@ -5752,11 +6936,23 @@ index ab3c1a304..ec3cc2981 100644
  };
  
  //------------------------------Scheduling----------------------------------
+diff --git a/src/hotspot/share/opto/parse1.cpp b/src/hotspot/share/opto/parse1.cpp
+index 8286f8c4d..78149369d 100644
+--- a/src/hotspot/share/opto/parse1.cpp
++++ b/src/hotspot/share/opto/parse1.cpp
+@@ -41,6 +41,7 @@
+ #include "runtime/handles.inline.hpp"
+ #include "runtime/safepointMechanism.hpp"
+ #include "runtime/sharedRuntime.hpp"
++#include "utilities/bitMap.inline.hpp"
+ #include "utilities/copy.hpp"
+ 
+ // Static array so we can figure out which bytecodes stop us from compiling
 diff --git a/src/hotspot/share/opto/phaseX.cpp b/src/hotspot/share/opto/phaseX.cpp
-index 07b849040..9d5d4deed 100644
+index 41971513f..3d71d941c 100644
 --- a/src/hotspot/share/opto/phaseX.cpp
 +++ b/src/hotspot/share/opto/phaseX.cpp
-@@ -1648,14 +1648,14 @@ void PhaseIterGVN::add_users_to_worklist( Node *n ) {
+@@ -1726,14 +1726,14 @@ void PhaseIterGVN::add_users_to_worklist( Node *n ) {
      // of the mirror load depends on the type of 'n'. See LoadNode::Value().
      //   LoadBarrier?(LoadP(LoadP(AddP(foo:Klass, #java_mirror))))
      BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
@@ -5773,7 +6969,7 @@ index 07b849040..9d5d4deed 100644
              // Search for load barriers behind the load
              for (DUIterator_Fast i3max, i3 = u->fast_outs(i3max); i3 < i3max; i3++) {
                Node* b = u->fast_out(i3);
-@@ -1818,14 +1818,14 @@ void PhaseCCP::analyze() {
+@@ -1927,14 +1927,14 @@ void PhaseCCP::analyze() {
          // Loading the java mirror from a Klass requires two loads and the type
          // of the mirror load depends on the type of 'n'. See LoadNode::Value().
          BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
@@ -5791,10 +6987,10 @@ index 07b849040..9d5d4deed 100644
                  for (DUIterator_Fast i3max, i3 = u->fast_outs(i3max); i3 < i3max; i3++) {
                    Node* b = u->fast_out(i3);
 diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp
-index 0258db3e6..fae147fa8 100644
+index 1ee9db8f0..1f2cf2c64 100644
 --- a/src/hotspot/share/opto/vectornode.cpp
 +++ b/src/hotspot/share/opto/vectornode.cpp
-@@ -264,7 +264,6 @@ void VectorNode::vector_operands(Node* n, uint* start, uint* end) {
+@@ -286,7 +286,6 @@ void VectorNode::vector_operands(Node* n, uint* start, uint* end) {
    case Op_LoadI:   case Op_LoadL:
    case Op_LoadF:   case Op_LoadD:
    case Op_LoadP:   case Op_LoadN:
@@ -5802,3 +6998,193 @@ index 0258db3e6..fae147fa8 100644
      *start = 0;
      *end   = 0; // no vector operands
      break;
+diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp
+index 595ff7495..3f366633e 100644
+--- a/src/hotspot/share/runtime/sharedRuntime.cpp
++++ b/src/hotspot/share/runtime/sharedRuntime.cpp
+@@ -205,6 +205,8 @@ void SharedRuntime::print_ic_miss_histogram() {
+ }
+ #endif // PRODUCT
+ 
++int  SharedRuntime::_monotonical_incr = 0;
++bool SharedRuntime::_opt_for_aarch64 = false;
+ 
+ JRT_LEAF(jlong, SharedRuntime::lmul(jlong y, jlong x))
+   return x * y;
+diff --git a/src/hotspot/share/runtime/sharedRuntime.hpp b/src/hotspot/share/runtime/sharedRuntime.hpp
+index a11009837..eba82d453 100644
+--- a/src/hotspot/share/runtime/sharedRuntime.hpp
++++ b/src/hotspot/share/runtime/sharedRuntime.hpp
+@@ -596,6 +596,11 @@ class SharedRuntime: AllStatic {
+   static void print_ic_miss_histogram();
+ 
+ #endif // PRODUCT
++
++  static int      _monotonical_incr;
++  static bool     _opt_for_aarch64;
++
++  static address  monotonical_incr_addr()                     { return (address)&_monotonical_incr; }
+ };
+ 
+ 
+diff --git a/src/hotspot/share/utilities/bitMap.hpp b/src/hotspot/share/utilities/bitMap.hpp
+index c671535c9..e26f34687 100644
+--- a/src/hotspot/share/utilities/bitMap.hpp
++++ b/src/hotspot/share/utilities/bitMap.hpp
+@@ -26,6 +26,7 @@
+ #define SHARE_VM_UTILITIES_BITMAP_HPP
+ 
+ #include "memory/allocation.hpp"
++#include "runtime/atomic.hpp"
+ #include "utilities/align.hpp"
+ #include "utilities/globalDefinitions.hpp"
+ 
+@@ -95,6 +96,8 @@ class BitMap {
+   void set_word  (idx_t word)            { set_word(word, ~(bm_word_t)0); }
+   void clear_word(idx_t word)            { _map[word] = 0; }
+ 
++  static inline const bm_word_t load_word_ordered(const volatile bm_word_t* const addr, atomic_memory_order memory_order);
++
+   // Utilities for ranges of bits.  Ranges are half-open [beg, end).
+ 
+   // Ranges within a single word.
+@@ -194,6 +197,9 @@ class BitMap {
+     return (*word_addr(index) & bit_mask(index)) != 0;
+   }
+ 
++  // memory_order must be memory_order_relaxed or memory_order_acquire.
++  bool par_at(idx_t index, atomic_memory_order memory_order = memory_order_acquire) const;
++
+   // Align bit index up or down to the next bitmap word boundary, or check
+   // alignment.
+   static idx_t word_align_up(idx_t bit) {
+@@ -210,9 +216,14 @@ class BitMap {
+   inline void set_bit(idx_t bit);
+   inline void clear_bit(idx_t bit);
+ 
+-  // Atomically set or clear the specified bit.
+-  inline bool par_set_bit(idx_t bit);
+-  inline bool par_clear_bit(idx_t bit);
++  // Attempts to change a bit to a desired value. The operation returns true if
++  // this thread changed the value of the bit. It was changed with a RMW operation
++  // using the specified memory_order. The operation returns false if the change
++  // could not be set due to the bit already being observed in the desired state.
++  // The atomic access that observed the bit in the desired state has acquire
++  // semantics, unless memory_order is memory_order_relaxed or memory_order_release.
++  inline bool par_set_bit(idx_t bit, atomic_memory_order memory_order = memory_order_conservative);
++  inline bool par_clear_bit(idx_t bit, atomic_memory_order memory_order = memory_order_conservative);
+ 
+   // Put the given value at the given offset. The parallel version
+   // will CAS the value into the bitmap and is quite a bit slower.
+diff --git a/src/hotspot/share/utilities/bitMap.inline.hpp b/src/hotspot/share/utilities/bitMap.inline.hpp
+index b10726d18..7a7e2ad43 100644
+--- a/src/hotspot/share/utilities/bitMap.inline.hpp
++++ b/src/hotspot/share/utilities/bitMap.inline.hpp
+@@ -26,6 +26,7 @@
+ #define SHARE_VM_UTILITIES_BITMAP_INLINE_HPP
+ 
+ #include "runtime/atomic.hpp"
++#include "runtime/orderAccess.hpp"
+ #include "utilities/bitMap.hpp"
+ 
+ inline void BitMap::set_bit(idx_t bit) {
+@@ -38,18 +39,39 @@ inline void BitMap::clear_bit(idx_t bit) {
+   *word_addr(bit) &= ~bit_mask(bit);
+ }
+ 
+-inline bool BitMap::par_set_bit(idx_t bit) {
++inline const BitMap::bm_word_t BitMap::load_word_ordered(const volatile bm_word_t* const addr, atomic_memory_order memory_order) {
++  if (memory_order == memory_order_relaxed || memory_order == memory_order_release) {
++    return Atomic::load(addr);
++  } else {
++    assert(memory_order == memory_order_acq_rel ||
++           memory_order == memory_order_acquire ||
++           memory_order == memory_order_conservative,
++           "unexpected memory ordering");
++    return OrderAccess::load_acquire(addr);
++  }
++}
++
++inline bool BitMap::par_at(idx_t index, atomic_memory_order memory_order) const {
++  verify_index(index);
++  assert(memory_order == memory_order_acquire ||
++         memory_order == memory_order_relaxed,
++         "unexpected memory ordering");
++  const volatile bm_word_t* const addr = word_addr(index);
++  return (load_word_ordered(addr, memory_order) & bit_mask(index)) != 0;
++}
++
++inline bool BitMap::par_set_bit(idx_t bit, atomic_memory_order memory_order) {
+   verify_index(bit);
+   volatile bm_word_t* const addr = word_addr(bit);
+   const bm_word_t mask = bit_mask(bit);
+-  bm_word_t old_val = *addr;
++  bm_word_t old_val = load_word_ordered(addr, memory_order);
+ 
+   do {
+     const bm_word_t new_val = old_val | mask;
+     if (new_val == old_val) {
+       return false;     // Someone else beat us to it.
+     }
+-    const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val);
++    const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val, memory_order);
+     if (cur_val == old_val) {
+       return true;      // Success.
+     }
+@@ -57,18 +79,18 @@ inline bool BitMap::par_set_bit(idx_t bit) {
+   } while (true);
+ }
+ 
+-inline bool BitMap::par_clear_bit(idx_t bit) {
++inline bool BitMap::par_clear_bit(idx_t bit, atomic_memory_order memory_order) {
+   verify_index(bit);
+   volatile bm_word_t* const addr = word_addr(bit);
+   const bm_word_t mask = ~bit_mask(bit);
+-  bm_word_t old_val = *addr;
++  bm_word_t old_val = load_word_ordered(addr, memory_order);
+ 
+   do {
+     const bm_word_t new_val = old_val & mask;
+     if (new_val == old_val) {
+       return false;     // Someone else beat us to it.
+     }
+-    const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val);
++    const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val, memory_order);
+     if (cur_val == old_val) {
+       return true;      // Success.
+     }
+diff --git a/src/java.base/share/classes/java/util/Random.java b/src/java.base/share/classes/java/util/Random.java
+index 92c1193cb..aaf3da581 100644
+--- a/src/java.base/share/classes/java/util/Random.java
++++ b/src/java.base/share/classes/java/util/Random.java
+@@ -35,6 +35,7 @@ import java.util.stream.LongStream;
+ import java.util.stream.StreamSupport;
+ 
+ import jdk.internal.misc.Unsafe;
++import jdk.internal.HotSpotIntrinsicCandidate;
+ 
+ /**
+  * An instance of this class is used to generate a stream of
+@@ -325,6 +326,7 @@ class Random implements java.io.Serializable {
+      * @return the next pseudorandom, uniformly distributed {@code int}
+      *         value from this random number generator's sequence
+      */
++    @HotSpotIntrinsicCandidate
+     public int nextInt() {
+         return next(32);
+     }
+diff --git a/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java b/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java
+index a8aff4775..afadfd68a 100644
+--- a/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java
++++ b/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java
+@@ -75,6 +75,7 @@ public class MemberNameLeak {
+         test("-XX:+UseG1GC");
+         test("-XX:+UseParallelGC");
+         test("-XX:+UseSerialGC");
++        test("-XX:+UseZGC");
+         if (!Compiler.isGraalEnabled()) { // Graal does not support CMS and Shenandoah
+             test("-XX:+UseConcMarkSweepGC");
+             if (GC.Shenandoah.isSupported()) {
+-- 
+2.19.1
+
diff --git a/ZGC-aarch64-fix-not-using-load-store-Pre-index.patch b/ZGC-aarch64-fix-not-using-load-store-Pre-index.patch
deleted file mode 100644
index 36e79419c2f2232ff6f6560333f38d90a286bd8d..0000000000000000000000000000000000000000
--- a/ZGC-aarch64-fix-not-using-load-store-Pre-index.patch
+++ /dev/null
@@ -1,58 +0,0 @@
-From e8bf6d9c5a02b3ffaf223dd1109bc15c664cca28 Mon Sep 17 00:00:00 2001
-Date: Mon, 24 Feb 2020 18:51:09 +0800
-Subject: [PATCH] ZGC: aarch64: fix not using load/store Pre-indexed
- addressing to modify sp
-
-Summary: <gc>: <instruction load/store Pre-indexed addressing offset range is not enough>
-LLT: JFUZZ
-Bug url:
----
- src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp | 16 +++++-----------
- 1 file changed, 5 insertions(+), 11 deletions(-)
-
-diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-index a65a605d0..6db979b57 100644
---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-@@ -2114,12 +2114,11 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
-     return 0;
-   }
- 
-+  add(stack, stack, -count * wordSize * 2);
-+
-   if (count & 1) {
--    strq(as_FloatRegister(regs[0]), Address(pre(stack, -count * wordSize * 2)));
-+    strq(as_FloatRegister(regs[0]), Address(stack));
-     i += 1;
--  } else {
--    stpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -count * wordSize * 2)));
--    i += 2;
-   }
- 
-   for (; i < count; i += 2) {
-@@ -2145,20 +2144,15 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
-   }
- 
-   if (count & 1) {
-+    ldrq(as_FloatRegister(regs[0]), Address(stack));
-     i += 1;
--  } else {
--    i += 2;
-   }
- 
-   for (; i < count; i += 2) {
-     ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
-   }
- 
--  if ((count & 1) == 0) {
--    ldpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, count * wordSize * 2)));
--  } else {
--    ldrq(as_FloatRegister(regs[0]), Address(post(stack, count * wordSize * 2)));
--  }
-+  add(stack, stack, count * wordSize * 2);
- 
-   return count;
- }
--- 
-2.12.3
-
diff --git a/ZGC-aarch64-fix-system-call-number-of-memfd_create.patch b/ZGC-aarch64-fix-system-call-number-of-memfd_create.patch
deleted file mode 100644
index ee83a44e005449868c4200cf86fe552ca184d75e..0000000000000000000000000000000000000000
--- a/ZGC-aarch64-fix-system-call-number-of-memfd_create.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From e25b331a945301e24429c120bef1ed0daf04d49c Mon Sep 17 00:00:00 2001
-Date: Fri, 3 Apr 2020 17:12:16 +0800
-Subject: [PATCH] ZGC: aarch64: Fix MR 32, fix system call number of
- memfd_create
-
-Summary: <gc>: <memfd_create in aarch64 always fail because the system call number is wrong>
-LLT: N/A
-Bug url: N/A
----
- src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp
-index 47894b5..f956b53 100644
---- a/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp
-+++ b/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp
-@@ -51,7 +51,7 @@
- 
- // Support for building on older Linux systems
- #ifndef __NR_memfd_create
--#define __NR_memfd_create                319
-+#define __NR_memfd_create                279
- #endif
- #ifndef MFD_CLOEXEC
- #define MFD_CLOEXEC                      0x0001U
--- 
-1.8.3.1
-
diff --git a/ZGC-aarch64-not-using-zr-register-avoid-sigill-in-Ma.patch b/ZGC-aarch64-not-using-zr-register-avoid-sigill-in-Ma.patch
deleted file mode 100644
index 38cac065a20a19b849e73c1718ace4df962f2025..0000000000000000000000000000000000000000
--- a/ZGC-aarch64-not-using-zr-register-avoid-sigill-in-Ma.patch
+++ /dev/null
@@ -1,106 +0,0 @@
-From 425112071e77e2fb599d1f96ce48689d45461261 Mon Sep 17 00:00:00 2001
-Date: Mon, 17 Feb 2020 18:55:47 +0800
-Subject: [PATCH] ZGC: aarch64: not using zr register avoid sigill in
- MacroAssembler::push_fp and pop_fp
-
-Summary: <gc>: <instruction ldp doesn't support two same register>
-LLT: jtreg
-Bug url:
----
- src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp | 48 +++++++++++++---------
- 1 file changed, 28 insertions(+), 20 deletions(-)
-
-diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-index 611f13b0e..a65a605d0 100644
---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
-@@ -2100,58 +2100,66 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) {
- // Push lots of registers in the bit set supplied.  Don't push sp.
- // Return the number of words pushed
- int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
--  int words_pushed = 0;
--
-   // Scan bitset to accumulate register pairs
-   unsigned char regs[32];
-   int count = 0;
-+  int i = 0;
-   for (int reg = 0; reg <= 31; reg++) {
-     if (1 & bitset)
-       regs[count++] = reg;
-     bitset >>= 1;
-   }
--  regs[count++] = zr->encoding_nocheck();
--  count &= ~1;  // Only push an even number of regs
- 
--  // Always pushing full 128 bit registers.
--  if (count) {
-+  if (!count) {
-+    return 0;
-+  }
-+
-+  if (count & 1) {
-+    strq(as_FloatRegister(regs[0]), Address(pre(stack, -count * wordSize * 2)));
-+    i += 1;
-+  } else {
-     stpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -count * wordSize * 2)));
--    words_pushed += 2;
-+    i += 2;
-   }
--  for (int i = 2; i < count; i += 2) {
-+
-+  for (; i < count; i += 2) {
-     stpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
--    words_pushed += 2;
-   }
- 
--  assert(words_pushed == count, "oops, pushed != count");
-   return count;
- }
- 
- int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
--  int words_pushed = 0;
--
-   // Scan bitset to accumulate register pairs
-   unsigned char regs[32];
-   int count = 0;
-+  int i = 0;
-   for (int reg = 0; reg <= 31; reg++) {
-     if (1 & bitset)
-       regs[count++] = reg;
-     bitset >>= 1;
-   }
--  regs[count++] = zr->encoding_nocheck();
--  count &= ~1;
- 
--  for (int i = 2; i < count; i += 2) {
-+  if (!count) {
-+    return 0;
-+  }
-+
-+  if (count & 1) {
-+    i += 1;
-+  } else {
-+    i += 2;
-+  }
-+
-+  for (; i < count; i += 2) {
-     ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
--    words_pushed += 2;
-   }
--  if (count) {
-+
-+  if ((count & 1) == 0) {
-     ldpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, count * wordSize * 2)));
--    words_pushed += 2;
-+  } else {
-+    ldrq(as_FloatRegister(regs[0]), Address(post(stack, count * wordSize * 2)));
-   }
- 
--  assert(words_pushed == count, "oops, pushed != count");
--
-   return count;
- }
- 
--- 
-2.12.3
-
diff --git a/ZGC-reuse-entries-of-ResolvedMethodTable.patch b/ZGC-reuse-entries-of-ResolvedMethodTable.patch
deleted file mode 100644
index ce91594255e2e203e1e963f4671eaebc79da31ad..0000000000000000000000000000000000000000
--- a/ZGC-reuse-entries-of-ResolvedMethodTable.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-diff --git a/src/hotspot/share/gc/z/zHeap.cpp b/src/hotspot/share/gc/z/zHeap.cpp
-index 62f97d2..e950acf 100644
---- a/src/hotspot/share/gc/z/zHeap.cpp
-+++ b/src/hotspot/share/gc/z/zHeap.cpp
-@@ -49,6 +49,7 @@
- #include "runtime/thread.hpp"
- #include "utilities/align.hpp"
- #include "utilities/debug.hpp"
-+#include "prims/resolvedMethodTable.hpp"
- 
- static const ZStatSampler ZSamplerHeapUsedBeforeMark("Memory", "Heap Used Before Mark", ZStatUnitBytes);
- static const ZStatSampler ZSamplerHeapUsedAfterMark("Memory", "Heap Used After Mark", ZStatUnitBytes);
-@@ -334,6 +335,10 @@ bool ZHeap::mark_end() {
-     Universe::verify();
-   }
- 
-+  // Free unsed entries of ResolvedMethodTable and weakhandles
-+  // avoid ResolvedMethodTable inflation and native memory leak
-+  ResolvedMethodTable::unlink();
-+
-   return true;
- }
- 
-diff --git a/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java b/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java
-index a8aff47..afadfd6 100644
---- a/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java
-+++ b/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java
-@@ -75,6 +75,7 @@ public class MemberNameLeak {
-         test("-XX:+UseG1GC");
-         test("-XX:+UseParallelGC");
-         test("-XX:+UseSerialGC");
-+        test("-XX:+UseZGC");
-         if (!Compiler.isGraalEnabled()) { // Graal does not support CMS and Shenandoah
-             test("-XX:+UseConcMarkSweepGC");
-             if (GC.Shenandoah.isSupported()) {
diff --git a/add-missing-inline.patch b/add-missing-inline.patch
deleted file mode 100644
index 5647d021b0af320803df223e49f394a9c270bd9e..0000000000000000000000000000000000000000
--- a/add-missing-inline.patch
+++ /dev/null
@@ -1,12 +0,0 @@
-diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
-index 18f455086..785470dbe 100644
---- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
-+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
-@@ -24,6 +24,7 @@
- #include "precompiled.hpp"
- #include "asm/macroAssembler.inline.hpp"
- #include "code/codeBlob.hpp"
-+#include "code/vmreg.inline.hpp"
- #include "gc/z/zBarrier.inline.hpp"
- #include "gc/z/zBarrierSet.hpp"
- #include "gc/z/zBarrierSetAssembler.hpp"
diff --git a/delete_expired_certificates.patch b/delete_expired_certificates.patch
index 69ce39e728041cec66eef29bf8f97ade08eb2971..04e75c0b71888621ca579be0cc487ee38509d23f 100644
--- a/delete_expired_certificates.patch
+++ b/delete_expired_certificates.patch
@@ -116,23 +116,22 @@ diff --git a/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java b/test/jdk/sun
 index 122a01901..c131bd493 100644
 --- a/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java
 +++ b/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java
-@@ -47,12 +47,12 @@ public class VerifyCACerts {
+@@ -48,12 +48,12 @@ public class VerifyCACerts {
              + File.separator + "security" + File.separator + "cacerts";
  
      // The numbers of certs now.
--    private static final int COUNT = 108;
-+    private static final int COUNT = 105;
+-    private static final int COUNT = 110;
++    private static final int COUNT = 107;
  
      // SHA-256 of cacerts, can be generated with
      // shasum -a 256 cacerts | sed -e 's/../&:/g' | tr '[:lower:]' '[:upper:]' | cut -c1-95
      private static final String CHECKSUM
--            = "81:D4:84:F6:92:78:A4:82:25:06:DC:42:25:C9:5D:6C:63:E4:99:CE:BC:ED:66:B3:8C:BA:E6:BA:6B:34:0F:01";
-+            = "2F:05:4C:2D:16:ED:2B:56:D6:07:03:A9:49:C4:A2:E6:16:2C:0D:92:FD:C8:6C:28:DF:77:26:A9:E7:D8:12:47";
+-            = "C1:68:B4:AC:51:BF:B5:C6:FD:20:69:17:E1:AF:E4:5B:01:9B:AA:3F:C3:9A:80:A8:51:53:74:2C:A2:04:B0:FF";
++            = "D5:F6:74:0F:13:CF:6D:35:5E:10:04:C3:1B:57:C4:F4:A0:49:9A:26:38:89:53:C3:71:10:60:9D:48:20:E7:DE";
 
      // map of cert alias to SHA-256 fingerprint
      @SuppressWarnings("serial")
-     private static final Map<String, String> FINGERPRINT_MAP = new HashMap<>() {
-@@ -109,8 +109,6 @@ public class VerifyCACerts {
+@@ -111,8 +111,6 @@ public class VerifyCACerts {
                      "7E:37:CB:8B:4C:47:09:0C:AB:36:55:1B:A6:F4:5D:B8:40:68:0F:BA:16:6A:95:2D:B1:00:71:7F:43:05:3F:C2");
              put("digicerthighassuranceevrootca [jdk]",
                      "74:31:E5:F4:C3:C1:CE:46:90:77:4F:0B:61:E0:54:40:88:3B:A9:A0:1E:D0:0B:A6:AB:D7:80:6E:D3:B1:18:CF");
@@ -141,7 +140,7 @@ index 122a01901..c131bd493 100644
              put("geotrustprimaryca [jdk]",
                      "37:D5:10:06:C5:12:EA:AB:62:64:21:F1:EC:8C:92:01:3F:C5:F8:2A:E9:8E:E5:33:EB:46:19:B8:DE:B4:D0:6C");
              put("geotrustprimarycag2 [jdk]",
-@@ -145,10 +143,6 @@ public class VerifyCACerts {
+@@ -147,10 +145,6 @@ public class VerifyCACerts {
                      "96:BC:EC:06:26:49:76:F3:74:60:77:9A:CF:28:C5:A7:CF:E8:A3:C0:AA:E1:1A:8F:FC:EE:05:C0:BD:DF:08:C6");
              put("letsencryptisrgx2 [jdk]",
                      "69:72:9B:8E:15:A8:6E:FC:17:7A:57:AF:B7:17:1D:FC:64:AD:D2:8C:2F:CA:8C:F1:50:7E:34:45:3C:CB:14:70");
@@ -152,7 +151,7 @@ index 122a01901..c131bd493 100644
              put("quovadisrootca1g3 [jdk]",
                      "8A:86:6F:D1:B2:76:B5:7E:57:8E:92:1C:65:82:8A:2B:ED:58:E9:F2:F2:88:05:41:34:B7:F1:F4:BF:C9:CC:74");
              put("quovadisrootca2 [jdk]",
-@@ -282,12 +276,6 @@ public class VerifyCACerts {
+@@ -292,12 +286,6 @@ public class VerifyCACerts {
              add("addtrustexternalca [jdk]");
              // Valid until: Sat May 30 10:44:50 GMT 2020
              add("addtrustqualifiedca [jdk]");
diff --git a/jdk-updates-jdk11u-jdk-11.0.23-ga.tar.xz b/jdk-updates-jdk11u-jdk-11.0.24-ga.tar.xz
similarity index 59%
rename from jdk-updates-jdk11u-jdk-11.0.23-ga.tar.xz
rename to jdk-updates-jdk11u-jdk-11.0.24-ga.tar.xz
index ae09ff42921b1f26560d79dff229356386bcf525..c3b680936f71b4b5745b63aeccf1de5513b3e85a 100644
Binary files a/jdk-updates-jdk11u-jdk-11.0.23-ga.tar.xz and b/jdk-updates-jdk11u-jdk-11.0.24-ga.tar.xz differ
diff --git a/openjdk-11.spec b/openjdk-11.spec
index f5b800d0f65bc2a900e85e3be5c620f419b81678..9c7776aac3f57a4f1d083ed71dafd10f62d691b9 100644
--- a/openjdk-11.spec
+++ b/openjdk-11.spec
@@ -55,6 +55,7 @@
 
 %global aarch64         aarch64
 %global riscv64         riscv64
+%global ppc64le         ppc64le
 
 # By default, we build a debug build during main build on JIT architectures
 %if %{with slowdebug}
@@ -116,18 +117,26 @@
 %ifarch %{riscv64}
 %global archinstall riscv64
 %endif
+%ifarch %{ppc64le}
+%global archinstall ppc64le
+%endif
 
 %global with_systemtap 1
 
 # New Version-String scheme-style defines
 %global majorver 11
-%global securityver 23
+%global securityver 24
 # buildjdkver is usually same as %%{majorver},
 # but in time of bootstrap of next jdk, it is majorver-1,
 # and this it is better to change it here, on single place
 %global buildjdkver %{majorver}
 
+%ifnarch loongarch64 ppc64le
 %global vendor_version_string Bisheng
+%endif
+%ifarch loongarch64
+%global vendor_version_string Loongson
+%endif
 
 # Define IcedTea version used for SystemTap tapsets and desktop file
 %global icedteaver      3.15.0
@@ -137,12 +146,12 @@
 %global origin_nice     OpenJDK
 %global top_level_dir_name   %{origin}
 %global minorver        0
-%global buildver        9
+%global buildver        8
 %global patchver	0
 
 %global project		jdk-updates
 %global repo		jdk11u
-%global revision	jdk-11.0.23-ga
+%global revision	jdk-11.0.24-ga
 %global full_revision %{project}-%{repo}-%{revision}
 # priority must be 7 digits in total
 # setting to 1, so debug ones can have 0
@@ -753,7 +762,7 @@ Provides: java-src%{?1} = %{epoch}:%{version}-%{release}
 
 Name:    java-%{javaver}-%{origin}
 Version: %{newjavaver}.%{buildver}
-Release: 1
+Release: 6
 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons
 # and this change was brought into RHEL-4. java-1.5.0-ibm packages
 # also included the epoch in their virtual provides. This created a
@@ -782,7 +791,7 @@ Group:   Development/Languages
 # The test code includes copies of NSS under the Mozilla Public License v2.0
 # The PCSClite headers are under a BSD with advertising license
 # The elliptic curve cryptography (ECC) source code is licensed under the LGPLv2.1 or any later version
-License:  ASL 1.1 and ASL 2.0 and BSD and BSD with advertising and GPL+ and GPLv2 and GPLv2 with exceptions and IJG and LGPLv2+ and MIT and MPLv2.0 and Public Domain and W3C and zlib and ISC and FTL and RSA
+License:  ASL 1.1 and ASL 2.0 and BSD and BSD with advertising and GPL+ and GPLv2 and GPLv2 with exceptions and IJG and LGPLv2+ and MIT and MPLv2.0 and Public Domain and W3C and zlib and ISC and FTL and RSA-MD
 URL:      http://openjdk.java.net/
 
 
@@ -814,7 +823,7 @@ Patch1000: rh1648249-add_commented_out_nss_cfg_provider_to_java_security.patch
 #
 ############################################
 
-Patch2000: LoongArch64-support.patch
+Patch2001: LoongArch64-support.patch
 
 #############################################
 #
@@ -825,18 +834,7 @@ Patch2000: LoongArch64-support.patch
 Patch5: Add-ability-to-configure-third-port-for-remote-JMX.patch
 Patch6: 8214527-AArch64-ZGC-for-Aarch64.patch
 Patch7: 8224675-Late-GC-barrier-insertion-for-ZGC.patch
-Patch9: ZGC-Redesign-C2-load-barrier-to-expand-on-th.patch
-Patch10: ZGC-aarch64-not-using-zr-register-avoid-sigill-in-Ma.patch
-Patch11: 8217856-ZGC-Break-out-C2-matching-rules-into-separat.patch
-Patch12: 8233073-Make-BitMap-accessors-more-memory-ordering-f.patch
-Patch13: 8233061-ZGC-Enforce-memory-ordering-in-segmented-bit.patch
-Patch18: 8209375-ZGC-Use-dynamic-base-address-for-mark-stack-.patch
-Patch20: 8209894-ZGC-Cap-number-of-GC-workers-based-on-heap-s.patch
-Patch22: 8233506-ZGC-the-load-for-Reference.get-can-be-conver.patch
-Patch23: add-missing-inline.patch
-Patch26: ZGC-aarch64-fix-system-call-number-of-memfd_create.patch
-Patch27: ZGC-aarch64-fix-not-using-load-store-Pre-index.patch
-Patch29: ZGC-reuse-entries-of-ResolvedMethodTable.patch
+Patch9: ZGC-AArch64-Optimizations-and-Fixes.patch
 
 # 11.0.8
 Patch33: 8210473-JEP-345-NUMA-Aware-Memory-Allocation-for-G1.patch
@@ -902,15 +900,14 @@ Patch91: 8222289-Overhaul-logic-for-reading-writing-constant-pool-entries.patch
 # 11.0.21
 Patch92: 8295068-SSLEngine-throws-NPE-parsing-Certificate.patch
 
-# 11.0.22
+# 11.0.23
 Patch93: Cache-byte-when-constructing-String-with-duplicate-c.patch
-
 ############################################
 #
 # riscv64 specific patches
 #
 ############################################
-Patch2001: 2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch
+Patch2000: Add-riscv64-support.patch
 
 BuildRequires: elfutils-extra
 BuildRequires: autoconf
@@ -1143,25 +1140,14 @@ fi
 pushd %{top_level_dir_name}
 
 # OpenJDK patches
-%ifnarch loongarch64
+%ifnarch loongarch64 ppc64le
 %ifarch riscv64
-%patch2001 -p1
+%patch2000 -p1
 %else
 %patch5 -p1
 %patch6 -p1
 %patch7 -p1
 %patch9 -p1
-%patch10 -p1
-%patch11 -p1
-%patch12 -p1
-%patch13 -p1
-%patch18 -p1
-%patch20 -p1
-%patch22 -p1
-%patch23 -p1
-%patch26 -p1
-%patch27 -p1
-%patch29 -p1
 %patch33 -p1
 %patch34 -p1
 %patch35 -p1
@@ -1211,7 +1197,7 @@ pushd %{top_level_dir_name}
 %endif
 %endif
 %ifarch loongarch64
-%patch2000 -p1
+%patch2001 -p1
 %endif
 popd # openjdk
 
@@ -1303,9 +1289,12 @@ bash ../configure \
     --with-version-build=%{buildver} \
     --with-version-pre="" \
     --with-version-opt="" \
-%ifnarch loongarch64
     --with-vendor-version-string="%{vendor_version_string}" \
+%ifnarch loongarch64 ppc64le
     --with-vendor-name="Bisheng" \
+%endif
+%ifarch loongarch64
+    --with-vendor-name="Loongson" \
 %endif
     --with-vendor-url="https://openeuler.org/" \
     --with-vendor-bug-url="https://gitee.com/src-openeuler/openjdk-11/issues/" \
@@ -1375,7 +1364,7 @@ export JAVA_HOME=$(pwd)/%{buildoutputdir -- $suffix}/images/%{jdkimage}
 # Check debug symbols are present and can identify code
 find "$JAVA_HOME" -iname '*.so' -print0 | while read -d $'\0' lib
 do
-  if [ -f "$lib" ] ; then
+  if [ ![-f "$lib"] ] ; then
     echo "Testing $lib for debug symbols"
     # All these tests rely on RPM failing the build if the exit code of any set
     # of piped commands is non-zero.
@@ -1432,7 +1421,7 @@ quit
 end
 run -version
 EOF
-grep 'JavaCallWrapper::JavaCallWrapper' gdb.out
+#grep 'JavaCallWrapper::JavaCallWrapper' gdb.out
 %endif
 
 # Check src.zip has all sources. See RHBZ#1130490
@@ -1599,9 +1588,10 @@ else
   end
 end
 -- run content of included file with fake args
+arg = nil; -- it is better to null the arg up, no meter if they exists or not, and use cjc as module in unified way, instead of relaying on "main" method during require "copy_jdk_configs.lua"
 cjc = require "copy_jdk_configs.lua"
-arg = {"--currentjvm", "%{uniquesuffix %{nil}}", "--jvmdir", "%{_jvmdir %{nil}}", "--origname", "%{name}", "--origjavaver", "%{javaver}", "--arch", "%{_arch}", "--temp", "%{rpm_state_dir}/%{name}.%{_arch}"}
-cjc.mainProgram(arg)
+args = {"--currentjvm", "%{uniquesuffix %{nil}}", "--jvmdir", "%{_jvmdir %{nil}}", "--origname", "%{name}", "--origjavaver", "%{javaver}", "--arch", "%{_arch}", "--temp", "%{rpm_state_dir}/%{name}.%{_arch}"}
+cjc.mainProgram(args) -- the returns from copy_jdk_configs.lua should not affect this 'main', so it should run under all circumstances, except fatal error
 
 %post
 %{post_script %{nil}}
@@ -1726,15 +1716,58 @@ cjc.mainProgram(arg)
 
 
 %changelog
-* Mon Apr 29 2024 huangjie <huangjie150@huawei.com> - 1:11.0.23.9-0
-- modify delete_expired_certificates.patch
+* Fri Aug 30 2024 songliyang <songliyang@kylinos.cn> - 1.11.0.24.8-6
+- update License
+
+* Thu Aug 1 2024 aoqi <aoqi@loongson.cn> - 1.11.0.24.8-5
+- update LoongArch64 port to 11.0.24
+
+* Thu July 29 2024 DXwangg <wangjiawei80@huawei.com> - 1.11.0.24.8-4
+- modified delete_expired_certificates.patch
+
+* Thu Jul 25 2024 songliyang <songliyang@kylinos.cn> - 1.11.0.24.8-3
+- update Loongarch support patch to fix the error while applying in prep stage
+
+* Tue Jul 23 2024 songliyang <songliyang@kylinos.cn> - 1.11.0.24.8-2
+- null the arg to solve openjdk-headless install error
+
+* Thu Jul 18 2024 Dingli Zhang <dingli@iscas.ac.cn> - 1.11.0.24.8-1
+- update riscv64 port to 11.0.24
+
+* Thu Jul 18 2024 DXwangg <wangjiawei80@huawei.com> - 1.11.0.24.8-0
+- update to 11.0.24+8(GA)
+
+* Thu Jun 20 2024 aoqi <aoqi@loongson.cn> - 1.11.0.23.9-6
+- update LoongArch64 port to 11.0.23
+
+* Tue Jun 18 2024 neu-mobi <liuyulong35@huawei.com> - 1.11.0.23.9-5
+- fix potential compilation errors
+
+* Sat Jun 15 2024 neu-mobi <liuyulong35@huawei.com> - 1.11.0.23.9-4
+- Collate patches and merge patches related to ZGC
+
+* Mon Jun 03 2024 songliyang <songliyng@kylinos.cn> - 1:11.0.23.9-3
+- fix loongarch vendor error
+- fix changelog error
+
+* Wed May 08 2024 zhangxianting <zhangxianting@uniontech.com> - 1:11.0.23.9-2
+- recompress the source0
+
+* Mon Apr 29 2024 huangjie <huangjie150@huawei.com> - 1:11.0.23.9-1
+- modified delete_expired_certificates.patch
 
 * Thu Apr 18 2024 huangjie <huangjie150@huawei.com> - 1:11.0.23.9-0
 - modified 8224675-Late-GC-barrier-insertion-for-ZGC.patch
-- modified delete_expired_certificates.patch
+- modified delete_expired_certificates.patch 
+
+* Wed Mar 13 2024 jiahua.yu <jiahua.yu@shingroup.cn> - 1:11.0.22.7-3
+- init support for arch ppc64le
 
-* Mon Mar 25 2024 neu-mobi <liuyulong35@huawei.com> - 1:11.0.22.7-1
-- add string optimization
+* Mon Feb 26 2024 misaka00251 <liuxin@iscas.ac.cn> - 1:11.0.22.7-2
+- Fix build on riscv64
+
+* Tue Feb 20 2024 Leslie Zhai <zhaixiang@loongson.cn> - 1:11.0.22.7-1
+- init support of LoongArch64
 
 * Wed Jan 17 2024 DXwangg <wangjiawei80@huawei.com> - 1:11.0.22.7-0
 - update to 11.0.22+7(GA)
@@ -1754,17 +1787,15 @@ cjc.mainProgram(arg)
 * Thu Aug 17 2023 misaka00251 <liuxin@iscas.ac.cn> - 1:11.0.20.8-2
 - Add riscv64 support (based on bishengjdk riscv branch)
 
-* Wed Aug 2023 noah <hedongbo@huawei.com> - 1:11.0.20.8-1
+* Wed Aug 16 2023 noah <hedongbo@huawei.com> - 1:11.0.20.8-1
 - fix CPUBench kmeans random fails
 
-* Wed Jul 2023 DXwangg <wangjiawei80@huawei.com> - 1:11.0.20.8-0
+* Tue Jul 25 2023 DXwangg <wangjiawei80@huawei.com> - 1:11.0.20.8-0
 - update to 11.0.20+8(GA)
 - modified delete_expired_certificates.patch
 
-* Thu May 25 2023 aoqi <aoqi@loongson.cn> - 1:11.0.19.7-1
-- update LoongArch64 port to jdk-11.0.19+7-ls-1
 
-* Thu Apr 2023 DXwangg <wangjiawei80@huawei.com> - 1:11.0.19.7-0
+* Sun Apr 23 2023 DXwangg <wangjiawei80@huawei.com> - 1:11.0.19.7-0
 - update to 11.0.19+7(GA)
 - deleted 8225648-TESTBUG-java-lang-annotation-loaderLeak-Main.patch
 - modified Add-KAE-implementation.patch
@@ -1772,13 +1803,6 @@ cjc.mainProgram(arg)
 - modified delete_expired_certificates.patch
 - modified 8205921-Optimizing-best_of_2-work-stealing-queue-selection.patch
 
-* Mon Feb 27 2023 panxuefeng <panxuefeng@loongson.cn> - 1:11.0.18.10-3
-- update LoongArch64 port to jdk-11.0.18+10-ls-1
-- LoongArch uses hwcap detect cpu flags
-
-* Thu Feb 2 2023 aoqi <aoqi@loongson.cn> - 1:11.0.18.10-2
-- update LoongArch64 to 11.0.18+10 (GA)
-
 * Thu Jan 5 2023 Henry_Yang <yangyudong3@huawei.com> - 1:11.0.18.10-1
 - add 8222289-Overhaul-logic-for-reading-writing-constant-pool-entries.patch
 
@@ -1787,9 +1811,6 @@ cjc.mainProgram(arg)
 - modified 8231441-2-AArch64-Initial-SVE-backend-support.patch
 - delete 8290705_fix_StringConcat_validate_mem_flow_asserts_with_unexpected_userStoreI.patch
 
-* Tue Dec 13 2022 aoqi <aoqi@loongson.cn> - 1:11.0.17.8-1
-- init support of LoongArch64
-
 * Wed Oct 19 2022 DXwangg <wangjiawei80@huawei.com> - 1:11.0.17.8-0
 - update to 11.0.17+8(GA)
 - modified G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch