diff --git a/8209375-ZGC-Use-dynamic-base-address-for-mark-stack-.patch b/8209375-ZGC-Use-dynamic-base-address-for-mark-stack-.patch deleted file mode 100644 index dfad78c3f575689a322484d1c66ab08bedeeb50b..0000000000000000000000000000000000000000 --- a/8209375-ZGC-Use-dynamic-base-address-for-mark-stack-.patch +++ /dev/null @@ -1,184 +0,0 @@ -From 476ec6be3f75c70c50bd1552c624abca098ddba2 Mon Sep 17 00:00:00 2001 -Date: Wed, 18 Mar 2020 10:25:06 +0000 -Subject: [PATCH] 8209375: ZGC: Use dynamic base address for mark stack space - -Summary: : -LLT: jdk11u/test/hotspot/jtreg/vmTestbase/gc/gctests/SoftReference/soft004/soft004.java -Bug url: https://bugs.openjdk.java.net/browse/JDK-8209375 ---- - src/hotspot/share/gc/z/zGlobals.hpp | 7 +--- - src/hotspot/share/gc/z/zMarkStack.cpp | 74 +++++++++++++++-------------------- - src/hotspot/share/gc/z/zMarkStack.hpp | 1 + - src/hotspot/share/gc/z/z_globals.hpp | 6 +-- - 4 files changed, 38 insertions(+), 50 deletions(-) - -diff --git a/src/hotspot/share/gc/z/zGlobals.hpp b/src/hotspot/share/gc/z/zGlobals.hpp -index 080ea5c0e..0f9e9dcb4 100644 ---- a/src/hotspot/share/gc/z/zGlobals.hpp -+++ b/src/hotspot/share/gc/z/zGlobals.hpp -@@ -117,11 +117,8 @@ extern uintptr_t ZAddressWeakBadMask; - // Marked state - extern uintptr_t ZAddressMetadataMarked; - --// Address space for mark stack allocations --const size_t ZMarkStackSpaceSizeShift = 40; // 1TB --const size_t ZMarkStackSpaceSize = (size_t)1 << ZMarkStackSpaceSizeShift; --const uintptr_t ZMarkStackSpaceStart = ZAddressSpaceEnd + ZMarkStackSpaceSize; --const uintptr_t ZMarkStackSpaceEnd = ZMarkStackSpaceStart + ZMarkStackSpaceSize; -+// Mark stack space -+extern uintptr_t ZMarkStackSpaceStart; - const size_t ZMarkStackSpaceExpandSize = (size_t)1 << 25; // 32M - - // Mark stack and magazine sizes -diff --git a/src/hotspot/share/gc/z/zMarkStack.cpp b/src/hotspot/share/gc/z/zMarkStack.cpp -index 52fe51ece..9cc768956 100644 ---- a/src/hotspot/share/gc/z/zMarkStack.cpp -+++ b/src/hotspot/share/gc/z/zMarkStack.cpp -@@ -28,58 +28,44 @@ - #include "gc/z/zMarkStack.inline.hpp" - #include "logging/log.hpp" - #include "runtime/atomic.hpp" -+#include "runtime/os.hpp" - #include "utilities/debug.hpp" - --#include --#include -+uintptr_t ZMarkStackSpaceStart; - - ZMarkStackSpace::ZMarkStackSpace() : - _expand_lock(), -+ _start(0), - _top(0), - _end(0) { -- assert(ZMarkStacksMax >= ZMarkStackSpaceExpandSize, "ZMarkStacksMax too small"); -- assert(ZMarkStacksMax <= ZMarkStackSpaceSize, "ZMarkStacksMax too large"); -- -+ assert(ZMarkStackSpaceLimit >= ZMarkStackSpaceExpandSize, "ZMarkStackSpaceLimit too small"); - // Reserve address space -- const void* res = mmap((void*)ZMarkStackSpaceStart, ZMarkStackSpaceSize, -- PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0); -- if (res != (void*)ZMarkStackSpaceStart) { -- log_error(gc, marking)("Failed to reserve address space for marking stacks"); -+ const size_t size = ZMarkStackSpaceLimit; -+ const size_t alignment = (size_t)os::vm_allocation_granularity(); -+ const uintptr_t addr = (uintptr_t)os::reserve_memory(size, NULL, alignment, mtGC); -+ if (addr == 0) { -+ log_error(gc, marking)("Failed to reserve address space for mark stacks"); - return; - } - - // Successfully initialized -- _top = _end = ZMarkStackSpaceStart; --} -+ _start = _top = _end = addr; - --bool ZMarkStackSpace::is_initialized() const { -- return _top != 0; -+ // Register mark stack space start -+ ZMarkStackSpaceStart = _start; - } - --bool ZMarkStackSpace::expand() { -- const size_t max = ZMarkStackSpaceStart + ZMarkStacksMax; -- if (_end + ZMarkStackSpaceExpandSize > max) { -- // Expansion limit reached -- return false; -- } -- -- void* const res = mmap((void*)_end, ZMarkStackSpaceExpandSize, -- PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED, -1, 0); -- if (res == MAP_FAILED) { -- ZErrno err; -- log_error(gc, marking)("Failed to map memory for marking stacks (%s)", err.to_string()); -- return false; -- } -- -- return true; -+bool ZMarkStackSpace::is_initialized() const { -+ return _start != 0; - } - - uintptr_t ZMarkStackSpace::alloc_space(size_t size) { -- uintptr_t top = _top; -+ uintptr_t top = Atomic::load(&_top); - - for (;;) { -+ const uintptr_t end = Atomic::load(&_end); - const uintptr_t new_top = top + size; -- if (new_top > _end) { -+ if (new_top > end) { - // Not enough space left - return 0; - } -@@ -104,24 +90,28 @@ uintptr_t ZMarkStackSpace::expand_and_alloc_space(size_t size) { - return addr; - } - -- // Expand stack space -- if (!expand()) { -- // We currently can't handle the situation where we -- // are running out of mark stack space. -- fatal("Mark stack overflow (allocated " SIZE_FORMAT "M, size " SIZE_FORMAT "M, max " SIZE_FORMAT "M)," -- " use -XX:ZMarkStacksMax=? to increase this limit", -- (_end - ZMarkStackSpaceStart) / M, size / M, ZMarkStacksMax / M); -- return 0; -+ // Check expansion limit -+ const size_t expand_size = ZMarkStackSpaceExpandSize; -+ const size_t old_size = _end - _start; -+ const size_t new_size = old_size + expand_size; -+ if (new_size > ZMarkStackSpaceLimit) { -+ // Expansion limit reached. This is a fatal error since we -+ // currently can't recover from running out of mark stack space. -+ fatal("Mark stack space exhausted. Use -XX:ZMarkStackSpaceLimit= to increase the " -+ "maximum number of bytes allocated for mark stacks. Current limit is " SIZE_FORMAT "M.", -+ ZMarkStackSpaceLimit / M); - } - - log_debug(gc, marking)("Expanding mark stack space: " SIZE_FORMAT "M->" SIZE_FORMAT "M", -- (_end - ZMarkStackSpaceStart) / M, -- (_end - ZMarkStackSpaceStart + ZMarkStackSpaceExpandSize) / M); -+ old_size / M, new_size / M); -+ -+ // Expand -+ os::commit_memory_or_exit((char*)_end, expand_size, false /* executable */, "Mark stack space"); - - // Increment top before end to make sure another - // thread can't steal out newly expanded space. - addr = Atomic::add(size, &_top) - size; -- _end += ZMarkStackSpaceExpandSize; -+ Atomic::add(expand_size, &_end); - - return addr; - } -diff --git a/src/hotspot/share/gc/z/zMarkStack.hpp b/src/hotspot/share/gc/z/zMarkStack.hpp -index b68b9faa3..12f3e4eca 100644 ---- a/src/hotspot/share/gc/z/zMarkStack.hpp -+++ b/src/hotspot/share/gc/z/zMarkStack.hpp -@@ -76,6 +76,7 @@ typedef ZStackList ZMarkStackMagazineList; - class ZMarkStackSpace { - private: - ZLock _expand_lock; -+ uintptr_t _start; - volatile uintptr_t _top; - volatile uintptr_t _end; - -diff --git a/src/hotspot/share/gc/z/z_globals.hpp b/src/hotspot/share/gc/z/z_globals.hpp -index 9e0f8985b..8cee59be7 100644 ---- a/src/hotspot/share/gc/z/z_globals.hpp -+++ b/src/hotspot/share/gc/z/z_globals.hpp -@@ -53,9 +53,9 @@ - "Allow Java threads to stall and wait for GC to complete " \ - "instead of immediately throwing an OutOfMemoryError") \ - \ -- product(size_t, ZMarkStacksMax, NOT_LP64(512*M) LP64_ONLY(8*G), \ -- "Maximum number of bytes allocated for marking stacks") \ -- range(32*M, NOT_LP64(512*M) LP64_ONLY(1024*G)) \ -+ product(size_t, ZMarkStackSpaceLimit, 8*G, \ -+ "Maximum number of bytes allocated for mark stacks") \ -+ range(32*M, 1024*G) \ - \ - product(uint, ZCollectionInterval, 0, \ - "Force GC at a fixed time interval (in seconds)") \ --- -2.12.3 - diff --git a/8209894-ZGC-Cap-number-of-GC-workers-based-on-heap-s.patch b/8209894-ZGC-Cap-number-of-GC-workers-based-on-heap-s.patch deleted file mode 100644 index 6ba8ad526b9d5d8380afa4977bfc0d9432eb27b8..0000000000000000000000000000000000000000 --- a/8209894-ZGC-Cap-number-of-GC-workers-based-on-heap-s.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 7ca249ae82c6b6c60c524781806f9d12ef3f8f98 Mon Sep 17 00:00:00 2001 -Date: Mon, 16 Mar 2020 16:24:43 +0800 -Subject: [PATCH] 8209894: ZGC: Cap number of GC workers based on heap size - -Summary: : -LLT: jdk11u/test/hotspot/jtreg/vmTestbase/nsk/jdi/ObjectReference/disableCollection/disablecollection002/TestDescription.java -Bug url: https://bugs.openjdk.java.net/browse/JDK-8209894 ---- - src/hotspot/share/gc/z/zWorkers.cpp | 23 ++++++++++++++++++----- - src/hotspot/share/gc/z/zWorkers.hpp | 4 +--- - 2 files changed, 19 insertions(+), 8 deletions(-) - -diff --git a/src/hotspot/share/gc/z/zWorkers.cpp b/src/hotspot/share/gc/z/zWorkers.cpp -index 0686ec7af..6a0c2561d 100644 ---- a/src/hotspot/share/gc/z/zWorkers.cpp -+++ b/src/hotspot/share/gc/z/zWorkers.cpp -@@ -1,5 +1,5 @@ - /* -- * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -22,14 +22,27 @@ - */ - - #include "precompiled.hpp" -+#include "gc/z/zGlobals.hpp" - #include "gc/z/zTask.hpp" - #include "gc/z/zWorkers.inline.hpp" - #include "runtime/os.hpp" - #include "runtime/mutexLocker.hpp" - #include "runtime/safepoint.hpp" - --uint ZWorkers::calculate_ncpus(double share_in_percent) { -- return ceil(os::initial_active_processor_count() * share_in_percent / 100.0); -+static uint calculate_nworkers_based_on_ncpus(double cpu_share_in_percent) { -+ return ceil(os::initial_active_processor_count() * cpu_share_in_percent / 100.0); -+} -+ -+static uint calculate_nworkers_based_on_heap_size(double reserve_share_in_percent) { -+ const int nworkers = ((MaxHeapSize * (reserve_share_in_percent / 100.0)) - ZPageSizeMedium) / ZPageSizeSmall; -+ return MAX2(nworkers, 1); -+} -+ -+static uint calculate_nworkers(double cpu_share_in_percent) { -+ // Cap number of workers so that we never use more than 10% of the max heap -+ // for the reserve. This is useful when using small heaps on large machines. -+ return MIN2(calculate_nworkers_based_on_ncpus(cpu_share_in_percent), -+ calculate_nworkers_based_on_heap_size(10.0)); - } - - uint ZWorkers::calculate_nparallel() { -@@ -38,7 +51,7 @@ uint ZWorkers::calculate_nparallel() { - // close to the number of processors tends to lead to over-provisioning and - // scheduling latency issues. Using 60% of the active processors appears to - // be a fairly good balance. -- return calculate_ncpus(60.0); -+ return calculate_nworkers(60.0); - } - - uint ZWorkers::calculate_nconcurrent() { -@@ -48,7 +61,7 @@ uint ZWorkers::calculate_nconcurrent() { - // throughput, while using too few threads will prolong the GC-cycle and - // we then risk being out-run by the application. Using 12.5% of the active - // processors appears to be a fairly good balance. -- return calculate_ncpus(12.5); -+ return calculate_nworkers(12.5); - } - - class ZWorkersWarmupTask : public ZTask { -diff --git a/src/hotspot/share/gc/z/zWorkers.hpp b/src/hotspot/share/gc/z/zWorkers.hpp -index 36a3c61fd..6ce09c447 100644 ---- a/src/hotspot/share/gc/z/zWorkers.hpp -+++ b/src/hotspot/share/gc/z/zWorkers.hpp -@@ -34,8 +34,6 @@ private: - bool _boost; - WorkGang _workers; - -- static uint calculate_ncpus(double share_in_percent); -- - void run(ZTask* task, uint nworkers); - - public: --- -2.12.3 - diff --git a/8217856-ZGC-Break-out-C2-matching-rules-into-separat.patch b/8217856-ZGC-Break-out-C2-matching-rules-into-separat.patch deleted file mode 100644 index b88b4bfecef68c704a43f77a4050494101550597..0000000000000000000000000000000000000000 --- a/8217856-ZGC-Break-out-C2-matching-rules-into-separat.patch +++ /dev/null @@ -1,906 +0,0 @@ -diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk -index 687896251..a39640526 100644 ---- a/make/hotspot/gensrc/GensrcAdlc.gmk -+++ b/make/hotspot/gensrc/GensrcAdlc.gmk -@@ -140,6 +140,12 @@ ifeq ($(call check-jvm-feature, compiler2), true) - $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \ - ))) - -+ ifeq ($(call check-jvm-feature, zgc), true) -+ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ -+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/z/z_$(HOTSPOT_TARGET_CPU).ad \ -+ ))) -+ endif -+ - ifeq ($(call check-jvm-feature, shenandoahgc), true) - AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ - $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ -diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad -index 29f81face..ab578476a 100644 ---- a/src/hotspot/cpu/aarch64/aarch64.ad -+++ b/src/hotspot/cpu/aarch64/aarch64.ad -@@ -1128,13 +1128,6 @@ definitions %{ - int_def VOLATILE_REF_COST ( 1000, 10 * INSN_COST); - %} - --source_hpp %{ -- --#include "gc/z/c2/zBarrierSetC2.hpp" --#include "gc/z/zThreadLocalData.hpp" -- --%} -- - //----------SOURCE BLOCK------------------------------------------------------- - // This is a block of C++ code which provides values, functions, and - // definitions necessary in the rest of the architecture description -@@ -18110,243 +18103,6 @@ instruct vpopcount2I(vecD dst, vecD src) %{ - ins_pipe(pipe_class_default); - %} - --source %{ -- --static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) { -- ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak); -- __ ldr(tmp, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -- __ andr(tmp, tmp, ref); -- __ cbnz(tmp, *stub->entry()); -- __ bind(*stub->continuation()); --} -- --static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { -- ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */); -- __ b(*stub->entry()); -- __ bind(*stub->continuation()); --} -- --%} -- --// Load Pointer --instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr) --%{ -- match(Set dst (LoadP mem)); -- predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierStrong)); -- effect(TEMP dst, KILL cr); -- -- ins_cost(4 * INSN_COST); -- -- format %{ "ldr $dst, $mem" %} -- -- ins_encode %{ -- const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); -- __ ldr($dst$$Register, ref_addr); -- if (barrier_data() != ZLoadBarrierElided) { -- z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, false /* weak */); -- } -- %} -- -- ins_pipe(iload_reg_mem); --%} -- --// Load Weak Pointer --instruct zLoadWeakP(iRegPNoSp dst, memory mem, rFlagsReg cr) --%{ -- match(Set dst (LoadP mem)); -- predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierWeak)); -- effect(TEMP dst, KILL cr); -- -- ins_cost(4 * INSN_COST); -- -- format %{ "ldr $dst, $mem" %} -- -- ins_encode %{ -- const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); -- __ ldr($dst$$Register, ref_addr); -- z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, true /* weak */); -- %} -- -- ins_pipe(iload_reg_mem); --%} -- --// Load Pointer Volatile --instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg cr) --%{ -- match(Set dst (LoadP mem)); -- predicate(UseZGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() == ZLoadBarrierStrong); -- effect(TEMP dst, KILL cr); -- -- ins_cost(VOLATILE_REF_COST); -- -- format %{ "ldar $dst, $mem\t" %} -- -- ins_encode %{ -- __ ldar($dst$$Register, $mem$$Register); -- if (barrier_data() != ZLoadBarrierElided) { -- z_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, false /* weak */); -- } -- %} -- -- ins_pipe(pipe_serial); --%} -- --instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -- match(Set res (CompareAndSwapP mem (Binary oldval newval))); -- match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -- predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -- effect(KILL cr, TEMP_DEF res); -- -- ins_cost(2 * VOLATILE_REF_COST); -- -- format %{ "cmpxchg $mem, $oldval, $newval\n\t" -- "cset $res, EQ" %} -- -- ins_encode %{ -- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -- false /* acquire */, true /* release */, false /* weak */, rscratch2); -- __ cset($res$$Register, Assembler::EQ); -- if (barrier_data() != ZLoadBarrierElided) { -- Label good; -- __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -- __ andr(rscratch1, rscratch1, rscratch2); -- __ cbz(rscratch1, good); -- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -- false /* acquire */, true /* release */, false /* weak */, rscratch2); -- __ cset($res$$Register, Assembler::EQ); -- __ bind(good); -- } -- %} -- -- ins_pipe(pipe_slow); --%} -- --instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -- match(Set res (CompareAndSwapP mem (Binary oldval newval))); -- match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -- predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); -- effect(KILL cr, TEMP_DEF res); -- -- ins_cost(2 * VOLATILE_REF_COST); -- -- format %{ "cmpxchg $mem, $oldval, $newval\n\t" -- "cset $res, EQ" %} -- -- ins_encode %{ -- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -- true /* acquire */, true /* release */, false /* weak */, rscratch2); -- __ cset($res$$Register, Assembler::EQ); -- if (barrier_data() != ZLoadBarrierElided) { -- Label good; -- __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -- __ andr(rscratch1, rscratch1, rscratch2); -- __ cbz(rscratch1, good); -- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ ); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -- true /* acquire */, true /* release */, false /* weak */, rscratch2); -- __ cset($res$$Register, Assembler::EQ); -- __ bind(good); -- } -- %} -- -- ins_pipe(pipe_slow); --%} -- --instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -- match(Set res (CompareAndExchangeP mem (Binary oldval newval))); -- predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -- effect(TEMP_DEF res, KILL cr); -- -- ins_cost(2 * VOLATILE_REF_COST); -- -- format %{ "cmpxchg $res = $mem, $oldval, $newval" %} -- -- ins_encode %{ -- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -- false /* acquire */, true /* release */, false /* weak */, $res$$Register); -- if (barrier_data() != ZLoadBarrierElided) { -- Label good; -- __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -- __ andr(rscratch1, rscratch1, $res$$Register); -- __ cbz(rscratch1, good); -- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -- false /* acquire */, true /* release */, false /* weak */, $res$$Register); -- __ bind(good); -- } -- %} -- -- ins_pipe(pipe_slow); --%} -- --instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -- match(Set res (CompareAndExchangeP mem (Binary oldval newval))); -- predicate(UseZGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -- effect(TEMP_DEF res, KILL cr); -- -- ins_cost(2 * VOLATILE_REF_COST); -- -- format %{ "cmpxchg $res = $mem, $oldval, $newval" %} -- -- ins_encode %{ -- guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -- true /* acquire */, true /* release */, false /* weak */, $res$$Register); -- if (barrier_data() != ZLoadBarrierElided) { -- Label good; -- __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -- __ andr(rscratch1, rscratch1, $res$$Register); -- __ cbz(rscratch1, good); -- z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */); -- __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -- true /* acquire */, true /* release */, false /* weak */, $res$$Register); -- __ bind(good); -- } -- %} -- -- ins_pipe(pipe_slow); --%} -- --instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ -- match(Set prev (GetAndSetP mem newv)); -- predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -- effect(TEMP_DEF prev, KILL cr); -- -- ins_cost(2 * VOLATILE_REF_COST); -- -- format %{ "atomic_xchg $prev, $newv, [$mem]" %} -- -- ins_encode %{ -- __ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register); -- if (barrier_data() != ZLoadBarrierElided) { -- z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */); -- } -- %} -- -- ins_pipe(pipe_serial); --%} -- --instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ -- match(Set prev (GetAndSetP mem newv)); -- predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); -- effect(TEMP_DEF prev, KILL cr); -- -- ins_cost(VOLATILE_REF_COST); -- -- format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %} -- -- ins_encode %{ -- __ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register); -- if (barrier_data() != ZLoadBarrierElided) { -- z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */); -- } -- %} -- ins_pipe(pipe_serial); --%} - - //----------PEEPHOLE RULES----------------------------------------------------- - // These must follow all instruction definitions as they use the names -diff --git a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad -new file mode 100644 -index 000000000..50cc6f924 ---- /dev/null -+++ b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad -@@ -0,0 +1,268 @@ -+// -+// Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// -+ -+source_hpp %{ -+ -+#include "gc/z/c2/zBarrierSetC2.hpp" -+#include "gc/z/zThreadLocalData.hpp" -+ -+%} -+ -+source %{ -+ -+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) { -+ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak); -+ __ ldr(tmp, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(tmp, tmp, ref); -+ __ cbnz(tmp, *stub->entry()); -+ __ bind(*stub->continuation()); -+} -+ -+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { -+ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */); -+ __ b(*stub->entry()); -+ __ bind(*stub->continuation()); -+} -+ -+%} -+ -+// Load Pointer -+instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr) -+%{ -+ match(Set dst (LoadP mem)); -+ predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierStrong)); -+ effect(TEMP dst, KILL cr); -+ -+ ins_cost(4 * INSN_COST); -+ -+ format %{ "ldr $dst, $mem" %} -+ -+ ins_encode %{ -+ const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); -+ __ ldr($dst$$Register, ref_addr); -+ if (barrier_data() != ZLoadBarrierElided) { -+ z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, false /* weak */); -+ } -+ %} -+ -+ ins_pipe(iload_reg_mem); -+%} -+ -+// Load Weak Pointer -+instruct zLoadWeakP(iRegPNoSp dst, memory mem, rFlagsReg cr) -+%{ -+ match(Set dst (LoadP mem)); -+ predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierWeak)); -+ effect(TEMP dst, KILL cr); -+ -+ ins_cost(4 * INSN_COST); -+ -+ format %{ "ldr $dst, $mem" %} -+ -+ ins_encode %{ -+ const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); -+ __ ldr($dst$$Register, ref_addr); -+ z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, true /* weak */); -+ %} -+ -+ ins_pipe(iload_reg_mem); -+%} -+ -+// Load Pointer Volatile -+instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg cr) -+%{ -+ match(Set dst (LoadP mem)); -+ predicate(UseZGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() == ZLoadBarrierStrong); -+ effect(TEMP dst, KILL cr); -+ -+ ins_cost(VOLATILE_REF_COST); -+ -+ format %{ "ldar $dst, $mem\t" %} -+ -+ ins_encode %{ -+ __ ldar($dst$$Register, $mem$$Register); -+ if (barrier_data() != ZLoadBarrierElided) { -+ z_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, false /* weak */); -+ } -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -+ predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(KILL cr, TEMP_DEF res); -+ -+ ins_cost(2 * VOLATILE_REF_COST); -+ -+ format %{ "cmpxchg $mem, $oldval, $newval\n\t" -+ "cset $res, EQ" %} -+ -+ ins_encode %{ -+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ false /* acquire */, true /* release */, false /* weak */, rscratch2); -+ __ cset($res$$Register, Assembler::EQ); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(rscratch1, rscratch1, rscratch2); -+ __ cbz(rscratch1, good); -+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ false /* acquire */, true /* release */, false /* weak */, rscratch2); -+ __ cset($res$$Register, Assembler::EQ); -+ __ bind(good); -+ } -+ %} -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -+ predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); -+ effect(KILL cr, TEMP_DEF res); -+ -+ ins_cost(2 * VOLATILE_REF_COST); -+ -+ format %{ "cmpxchg $mem, $oldval, $newval\n\t" -+ "cset $res, EQ" %} -+ -+ ins_encode %{ -+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ true /* acquire */, true /* release */, false /* weak */, rscratch2); -+ __ cset($res$$Register, Assembler::EQ); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(rscratch1, rscratch1, rscratch2); -+ __ cbz(rscratch1, good); -+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ ); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ true /* acquire */, true /* release */, false /* weak */, rscratch2); -+ __ cset($res$$Register, Assembler::EQ); -+ __ bind(good); -+ } -+ %} -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -+ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); -+ predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(TEMP_DEF res, KILL cr); -+ -+ ins_cost(2 * VOLATILE_REF_COST); -+ -+ format %{ "cmpxchg $res = $mem, $oldval, $newval" %} -+ -+ ins_encode %{ -+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ false /* acquire */, true /* release */, false /* weak */, $res$$Register); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(rscratch1, rscratch1, $res$$Register); -+ __ cbz(rscratch1, good); -+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ false /* acquire */, true /* release */, false /* weak */, $res$$Register); -+ __ bind(good); -+ } -+ %} -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -+ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); -+ predicate(UseZGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(TEMP_DEF res, KILL cr); -+ -+ ins_cost(2 * VOLATILE_REF_COST); -+ -+ format %{ "cmpxchg $res = $mem, $oldval, $newval" %} -+ -+ ins_encode %{ -+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ true /* acquire */, true /* release */, false /* weak */, $res$$Register); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(rscratch1, rscratch1, $res$$Register); -+ __ cbz(rscratch1, good); -+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ true /* acquire */, true /* release */, false /* weak */, $res$$Register); -+ __ bind(good); -+ } -+ %} -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ -+ match(Set prev (GetAndSetP mem newv)); -+ predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(TEMP_DEF prev, KILL cr); -+ -+ ins_cost(2 * VOLATILE_REF_COST); -+ -+ format %{ "atomic_xchg $prev, $newv, [$mem]" %} -+ -+ ins_encode %{ -+ __ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register); -+ if (barrier_data() != ZLoadBarrierElided) { -+ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */); -+ } -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ -+ match(Set prev (GetAndSetP mem newv)); -+ predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); -+ effect(TEMP_DEF prev, KILL cr); -+ -+ ins_cost(VOLATILE_REF_COST); -+ -+ format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %} -+ -+ ins_encode %{ -+ __ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register); -+ if (barrier_data() != ZLoadBarrierElided) { -+ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */); -+ } -+ %} -+ ins_pipe(pipe_serial); -+%} -+ -diff --git a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad -new file mode 100644 -index 000000000..38c2e926b ---- /dev/null -+++ b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad -@@ -0,0 +1,168 @@ -+// -+// Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// -+ -+source_hpp %{ -+ -+#include "gc/z/c2/zBarrierSetC2.hpp" -+#include "gc/z/zThreadLocalData.hpp" -+ -+%} -+ -+source %{ -+ -+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) { -+ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak); -+ __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); -+ __ jcc(Assembler::notZero, *stub->entry()); -+ __ bind(*stub->continuation()); -+} -+ -+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { -+ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */); -+ __ jmp(*stub->entry()); -+ __ bind(*stub->continuation()); -+} -+ -+%} -+ -+// Load Pointer -+instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr) -+%{ -+ predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierStrong); -+ match(Set dst (LoadP mem)); -+ effect(KILL cr, TEMP dst); -+ -+ ins_cost(125); -+ -+ format %{ "movq $dst, $mem" %} -+ -+ ins_encode %{ -+ __ movptr($dst$$Register, $mem$$Address); -+ if (barrier_data() != ZLoadBarrierElided) { -+ z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, false /* weak */); -+ } -+ %} -+ -+ ins_pipe(ialu_reg_mem); -+%} -+ -+// Load Weak Pointer -+instruct zLoadWeakP(rRegP dst, memory mem, rFlagsReg cr) -+%{ -+ predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierWeak); -+ match(Set dst (LoadP mem)); -+ effect(KILL cr, TEMP dst); -+ -+ ins_cost(125); -+ -+ format %{ "movq $dst, $mem" %} -+ -+ ins_encode %{ -+ __ movptr($dst$$Register, $mem$$Address); -+ z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, true /* weak */); -+ %} -+ -+ ins_pipe(ialu_reg_mem); -+%} -+ -+instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr) %{ -+ match(Set oldval (CompareAndExchangeP mem (Binary oldval newval))); -+ predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(KILL cr, TEMP tmp); -+ -+ format %{ "lock\n\t" -+ "cmpxchgq $newval, $mem" %} -+ -+ ins_encode %{ -+ if (barrier_data() != ZLoadBarrierElided) { -+ __ movptr($tmp$$Register, $oldval$$Register); -+ } -+ __ lock(); -+ __ cmpxchgptr($newval$$Register, $mem$$Address); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); -+ __ jcc(Assembler::zero, good); -+ z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register); -+ __ movptr($oldval$$Register, $tmp$$Register); -+ __ lock(); -+ __ cmpxchgptr($newval$$Register, $mem$$Address); -+ __ bind(good); -+ } -+ %} -+ -+ ins_pipe(pipe_cmpxchg); -+%} -+ -+instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlagsReg cr, rax_RegP oldval) %{ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -+ predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(KILL cr, KILL oldval, TEMP tmp); -+ -+ format %{ "lock\n\t" -+ "cmpxchgq $newval, $mem\n\t" -+ "sete $res\n\t" -+ "movzbl $res, $res" %} -+ -+ ins_encode %{ -+ if (barrier_data() != ZLoadBarrierElided) { -+ __ movptr($tmp$$Register, $oldval$$Register); -+ } -+ __ lock(); -+ __ cmpxchgptr($newval$$Register, $mem$$Address); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); -+ __ jcc(Assembler::zero, good); -+ z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register); -+ __ movptr($oldval$$Register, $tmp$$Register); -+ __ lock(); -+ __ cmpxchgptr($newval$$Register, $mem$$Address); -+ __ bind(good); -+ __ cmpptr($tmp$$Register, $oldval$$Register); -+ } -+ __ setb(Assembler::equal, $res$$Register); -+ __ movzbl($res$$Register, $res$$Register); -+ %} -+ -+ ins_pipe(pipe_cmpxchg); -+%} -+ -+instruct zXChgP(memory mem, rRegP newval, rFlagsReg cr) %{ -+ match(Set newval (GetAndSetP mem newval)); -+ predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(KILL cr); -+ -+ format %{ "xchgq $newval, $mem" %} -+ -+ ins_encode %{ -+ __ xchgptr($newval$$Register, $mem$$Address); -+ if (barrier_data() != ZLoadBarrierElided) { -+ z_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, false /* weak */); -+ } -+ %} -+ -+ ins_pipe(pipe_cmpxchg); -+%} -+ -diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad -index 95a8538f3..ede4d8864 100644 ---- a/src/hotspot/cpu/x86/x86_64.ad -+++ b/src/hotspot/cpu/x86/x86_64.ad -@@ -538,19 +538,6 @@ reg_class int_rdi_reg(RDI); - - %} - --source_hpp %{ -- --#include "gc/z/c2/zBarrierSetC2.hpp" --#include "gc/z/zThreadLocalData.hpp" -- --%} -- --source_hpp %{ --#if INCLUDE_ZGC --#include "gc/z/zBarrierSetAssembler.hpp" --#endif --%} -- - //----------SOURCE BLOCK------------------------------------------------------- - // This is a block of C++ code which provides values, functions, and - // definitions necessary in the rest of the architecture description -@@ -1882,19 +1869,6 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() { - return NO_REG_mask(); - } - --static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) { -- ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak); -- __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); -- __ jcc(Assembler::notZero, *stub->entry()); -- __ bind(*stub->continuation()); --} -- --static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { -- ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */); -- __ jmp(*stub->entry()); -- __ bind(*stub->continuation()); --} -- - %} - - //----------ENCODING BLOCK----------------------------------------------------- -@@ -12845,131 +12819,6 @@ instruct RethrowException() - ins_pipe(pipe_jmp); - %} - --// --// Execute ZGC load barrier (strong) slow path --// -- --// Load Pointer --instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr) --%{ -- predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierStrong); -- match(Set dst (LoadP mem)); -- effect(KILL cr, TEMP dst); -- -- ins_cost(125); -- -- format %{ "movq $dst, $mem" %} -- -- ins_encode %{ -- __ movptr($dst$$Register, $mem$$Address); -- if (barrier_data() != ZLoadBarrierElided) { -- z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, false /* weak */); -- } -- %} -- -- ins_pipe(ialu_reg_mem); --%} -- --// Load Weak Pointer --instruct zLoadWeakP(rRegP dst, memory mem, rFlagsReg cr) --%{ -- predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierWeak); -- match(Set dst (LoadP mem)); -- effect(KILL cr, TEMP dst); -- -- ins_cost(125); -- -- format %{ "movq $dst, $mem" %} -- ins_encode %{ -- __ movptr($dst$$Register, $mem$$Address); -- z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, true /* weak */); -- %} -- -- ins_pipe(ialu_reg_mem); --%} -- --instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr) %{ -- match(Set oldval (CompareAndExchangeP mem (Binary oldval newval))); -- predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -- effect(KILL cr, TEMP tmp); -- -- format %{ "lock\n\t" -- "cmpxchgq $newval, $mem" %} -- -- ins_encode %{ -- if (barrier_data() != ZLoadBarrierElided) { -- __ movptr($tmp$$Register, $oldval$$Register); -- } -- __ lock(); -- __ cmpxchgptr($newval$$Register, $mem$$Address); -- if (barrier_data() != ZLoadBarrierElided) { -- Label good; -- __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); -- __ jcc(Assembler::zero, good); -- z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register); -- __ movptr($oldval$$Register, $tmp$$Register); -- __ lock(); -- __ cmpxchgptr($newval$$Register, $mem$$Address); -- __ bind(good); -- } -- %} -- -- ins_pipe(pipe_cmpxchg); --%} -- -- --instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlagsReg cr, rax_RegP oldval) %{ -- match(Set res (CompareAndSwapP mem (Binary oldval newval))); -- match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -- predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -- effect(KILL cr, KILL oldval, TEMP tmp); -- -- format %{ "lock\n\t" -- "cmpxchgq $newval, $mem\n\t" -- "sete $res\n\t" -- "movzbl $res, $res" %} -- -- ins_encode %{ -- if (barrier_data() != ZLoadBarrierElided) { -- __ movptr($tmp$$Register, $oldval$$Register); -- } -- __ lock(); -- __ cmpxchgptr($newval$$Register, $mem$$Address); -- if (barrier_data() != ZLoadBarrierElided) { -- Label good; -- __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); -- __ jcc(Assembler::zero, good); -- z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register); -- __ movptr($oldval$$Register, $tmp$$Register); -- __ lock(); -- __ cmpxchgptr($newval$$Register, $mem$$Address); -- __ bind(good); -- __ cmpptr($tmp$$Register, $oldval$$Register); -- } -- __ setb(Assembler::equal, $res$$Register); -- __ movzbl($res$$Register, $res$$Register); -- %} -- -- ins_pipe(pipe_cmpxchg); --%} -- --instruct zXChgP(memory mem, rRegP newval, rFlagsReg cr) %{ -- match(Set newval (GetAndSetP mem newval)); -- predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -- effect(KILL cr); -- -- format %{ "xchgq $newval, $mem" %} -- -- ins_encode %{ -- __ xchgptr($newval$$Register, $mem$$Address); -- if (barrier_data() != ZLoadBarrierElided) { -- z_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, false /* weak */); -- } -- %} -- -- ins_pipe(pipe_cmpxchg); --%} -- - // ============================================================================ - // This name is KNOWN by the ADLC and cannot be changed. - // The ADLC forces a 'TypeRawPtr::BOTTOM' output type --- -2.19.0 - diff --git a/8231441-1-AArch64-Initial-SVE-backend-support.patch b/8231441-1-AArch64-Initial-SVE-backend-support.patch index 183a8c91e2659a6adef92648234ba4b1a9402dfd..cb002807c51cc28000a84c3fcf5f022c1ec9f282 100755 --- a/8231441-1-AArch64-Initial-SVE-backend-support.patch +++ b/8231441-1-AArch64-Initial-SVE-backend-support.patch @@ -308,8 +308,8 @@ index 643e3d564..82e615241 100644 static int cpu_revision() { return _revision; } + static int get_initial_sve_vector_length() { return _initial_sve_vector_length; }; - static bool is_zva_enabled() { return 0 <= _zva_length; } - static int zva_length() { + static bool is_hisi_enabled() { + if (_cpu == CPU_HISILICON && (_model == 0xd01 || _model == 0xd02 || _model == 0xd03)) { diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java new file mode 100644 index 000000000..dc15ca800 diff --git a/8233061-ZGC-Enforce-memory-ordering-in-segmented-bit.patch b/8233061-ZGC-Enforce-memory-ordering-in-segmented-bit.patch deleted file mode 100644 index a851739d97fdc092a60bde940517b6971e27ed4d..0000000000000000000000000000000000000000 --- a/8233061-ZGC-Enforce-memory-ordering-in-segmented-bit.patch +++ /dev/null @@ -1,101 +0,0 @@ -From d2137837d518a8bdb8e075109e502e78bd2f9fa9 Mon Sep 17 00:00:00 2001 -Date: Wed, 19 Feb 2020 17:36:32 +0800 -Subject: [PATCH] 8233061: ZGC: Enforce memory ordering in segmented bit maps - -Summary: : -LLT: renaissance -Bug url: https://bugs.openjdk.java.net/browse/JDK-8233061 ---- - src/hotspot/share/gc/z/zLiveMap.cpp | 20 +++++++++----------- - src/hotspot/share/gc/z/zLiveMap.inline.hpp | 9 +++++---- - 2 files changed, 14 insertions(+), 15 deletions(-) - -diff --git a/src/hotspot/share/gc/z/zLiveMap.cpp b/src/hotspot/share/gc/z/zLiveMap.cpp -index 7187b6166..c1d79b794 100644 ---- a/src/hotspot/share/gc/z/zLiveMap.cpp -+++ b/src/hotspot/share/gc/z/zLiveMap.cpp -@@ -50,7 +50,9 @@ void ZLiveMap::reset(size_t index) { - - // Multiple threads can enter here, make sure only one of them - // resets the marking information while the others busy wait. -- for (uint32_t seqnum = _seqnum; seqnum != ZGlobalSeqNum; seqnum = _seqnum) { -+ for (uint32_t seqnum = OrderAccess::load_acquire(&_seqnum); -+ seqnum != ZGlobalSeqNum; -+ seqnum = OrderAccess::load_acquire(&_seqnum)) { - if ((seqnum != seqnum_initializing) && - (Atomic::cmpxchg(seqnum_initializing, &_seqnum, seqnum) == seqnum)) { - // Reset marking information -@@ -61,13 +63,13 @@ void ZLiveMap::reset(size_t index) { - segment_live_bits().clear(); - segment_claim_bits().clear(); - -- // Make sure the newly reset marking information is -- // globally visible before updating the page seqnum. -- OrderAccess::storestore(); -- -- // Update seqnum - assert(_seqnum == seqnum_initializing, "Invalid"); -- _seqnum = ZGlobalSeqNum; -+ -+ // Make sure the newly reset marking information is ordered -+ // before the update of the page seqnum, such that when the -+ // up-to-date seqnum is load acquired, the bit maps will not -+ // contain stale information. -+ OrderAccess::release_store(&_seqnum, ZGlobalSeqNum); - break; - } - -@@ -89,10 +91,6 @@ void ZLiveMap::reset_segment(BitMap::idx_t segment) { - if (!claim_segment(segment)) { - // Already claimed, wait for live bit to be set - while (!is_segment_live(segment)) { -- // Busy wait. The loadload barrier is needed to make -- // sure we re-read the live bit every time we loop. -- OrderAccess::loadload(); -- - // Mark reset contention - if (!contention) { - // Count contention once -diff --git a/src/hotspot/share/gc/z/zLiveMap.inline.hpp b/src/hotspot/share/gc/z/zLiveMap.inline.hpp -index 1e4d56f41..fb45a892c 100644 ---- a/src/hotspot/share/gc/z/zLiveMap.inline.hpp -+++ b/src/hotspot/share/gc/z/zLiveMap.inline.hpp -@@ -30,6 +30,7 @@ - #include "gc/z/zOop.inline.hpp" - #include "gc/z/zUtils.inline.hpp" - #include "runtime/atomic.hpp" -+#include "runtime/orderAccess.hpp" - #include "utilities/bitMap.inline.hpp" - #include "utilities/debug.hpp" - -@@ -38,7 +39,7 @@ inline void ZLiveMap::reset() { - } - - inline bool ZLiveMap::is_marked() const { -- return _seqnum == ZGlobalSeqNum; -+ return OrderAccess::load_acquire(&_seqnum) == ZGlobalSeqNum; - } - - inline uint32_t ZLiveMap::live_objects() const { -@@ -68,15 +69,15 @@ inline BitMapView ZLiveMap::segment_claim_bits() { - } - - inline bool ZLiveMap::is_segment_live(BitMap::idx_t segment) const { -- return segment_live_bits().at(segment); -+ return segment_live_bits().par_at(segment); - } - - inline bool ZLiveMap::set_segment_live_atomic(BitMap::idx_t segment) { -- return segment_live_bits().par_set_bit(segment); -+ return segment_live_bits().par_set_bit(segment, memory_order_release); - } - - inline bool ZLiveMap::claim_segment(BitMap::idx_t segment) { -- return segment_claim_bits().par_set_bit(segment); -+ return segment_claim_bits().par_set_bit(segment, memory_order_acq_rel); - } - - inline BitMap::idx_t ZLiveMap::first_live_segment() const { --- -2.12.3 - diff --git a/8233073-Make-BitMap-accessors-more-memory-ordering-f.patch b/8233073-Make-BitMap-accessors-more-memory-ordering-f.patch deleted file mode 100644 index 5e1f19e24c3be12d23568387071d29b8bd473c03..0000000000000000000000000000000000000000 --- a/8233073-Make-BitMap-accessors-more-memory-ordering-f.patch +++ /dev/null @@ -1,162 +0,0 @@ -diff --git a/src/hotspot/share/c1/c1_Instruction.cpp b/src/hotspot/share/c1/c1_Instruction.cpp -index ee3be89..62d8b48 100644 ---- a/src/hotspot/share/c1/c1_Instruction.cpp -+++ b/src/hotspot/share/c1/c1_Instruction.cpp -@@ -29,6 +29,7 @@ - #include "c1/c1_ValueStack.hpp" - #include "ci/ciObjArrayKlass.hpp" - #include "ci/ciTypeArrayKlass.hpp" -+#include "utilities/bitMap.inline.hpp" - - - // Implementation of Instruction -diff --git a/src/hotspot/share/opto/graphKit.cpp b/src/hotspot/share/opto/graphKit.cpp -index bf9179f..e0696de 100644 ---- a/src/hotspot/share/opto/graphKit.cpp -+++ b/src/hotspot/share/opto/graphKit.cpp -@@ -43,6 +43,7 @@ - #include "opto/runtime.hpp" - #include "runtime/deoptimization.hpp" - #include "runtime/sharedRuntime.hpp" -+#include "utilities/bitMap.inline.hpp" - #include "utilities/macros.hpp" - #if INCLUDE_SHENANDOAHGC - #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp" -diff --git a/src/hotspot/share/opto/parse1.cpp b/src/hotspot/share/opto/parse1.cpp -index 99b1a67..f94f028 100644 ---- a/src/hotspot/share/opto/parse1.cpp -+++ b/src/hotspot/share/opto/parse1.cpp -@@ -41,6 +41,7 @@ - #include "runtime/handles.inline.hpp" - #include "runtime/safepointMechanism.hpp" - #include "runtime/sharedRuntime.hpp" -+#include "utilities/bitMap.inline.hpp" - #include "utilities/copy.hpp" - - // Static array so we can figure out which bytecodes stop us from compiling -diff --git a/src/hotspot/share/utilities/bitMap.hpp b/src/hotspot/share/utilities/bitMap.hpp -index c671535..e26f346 100644 ---- a/src/hotspot/share/utilities/bitMap.hpp -+++ b/src/hotspot/share/utilities/bitMap.hpp -@@ -26,6 +26,7 @@ - #define SHARE_VM_UTILITIES_BITMAP_HPP - - #include "memory/allocation.hpp" -+#include "runtime/atomic.hpp" - #include "utilities/align.hpp" - #include "utilities/globalDefinitions.hpp" - -@@ -95,6 +96,8 @@ class BitMap { - void set_word (idx_t word) { set_word(word, ~(bm_word_t)0); } - void clear_word(idx_t word) { _map[word] = 0; } - -+ static inline const bm_word_t load_word_ordered(const volatile bm_word_t* const addr, atomic_memory_order memory_order); -+ - // Utilities for ranges of bits. Ranges are half-open [beg, end). - - // Ranges within a single word. -@@ -194,6 +197,9 @@ class BitMap { - return (*word_addr(index) & bit_mask(index)) != 0; - } - -+ // memory_order must be memory_order_relaxed or memory_order_acquire. -+ bool par_at(idx_t index, atomic_memory_order memory_order = memory_order_acquire) const; -+ - // Align bit index up or down to the next bitmap word boundary, or check - // alignment. - static idx_t word_align_up(idx_t bit) { -@@ -210,9 +216,14 @@ class BitMap { - inline void set_bit(idx_t bit); - inline void clear_bit(idx_t bit); - -- // Atomically set or clear the specified bit. -- inline bool par_set_bit(idx_t bit); -- inline bool par_clear_bit(idx_t bit); -+ // Attempts to change a bit to a desired value. The operation returns true if -+ // this thread changed the value of the bit. It was changed with a RMW operation -+ // using the specified memory_order. The operation returns false if the change -+ // could not be set due to the bit already being observed in the desired state. -+ // The atomic access that observed the bit in the desired state has acquire -+ // semantics, unless memory_order is memory_order_relaxed or memory_order_release. -+ inline bool par_set_bit(idx_t bit, atomic_memory_order memory_order = memory_order_conservative); -+ inline bool par_clear_bit(idx_t bit, atomic_memory_order memory_order = memory_order_conservative); - - // Put the given value at the given offset. The parallel version - // will CAS the value into the bitmap and is quite a bit slower. -diff --git a/src/hotspot/share/utilities/bitMap.inline.hpp b/src/hotspot/share/utilities/bitMap.inline.hpp -index b10726d..7a7e2ad 100644 ---- a/src/hotspot/share/utilities/bitMap.inline.hpp -+++ b/src/hotspot/share/utilities/bitMap.inline.hpp -@@ -26,6 +26,7 @@ - #define SHARE_VM_UTILITIES_BITMAP_INLINE_HPP - - #include "runtime/atomic.hpp" -+#include "runtime/orderAccess.hpp" - #include "utilities/bitMap.hpp" - - inline void BitMap::set_bit(idx_t bit) { -@@ -38,18 +39,39 @@ inline void BitMap::clear_bit(idx_t bit) { - *word_addr(bit) &= ~bit_mask(bit); - } - --inline bool BitMap::par_set_bit(idx_t bit) { -+inline const BitMap::bm_word_t BitMap::load_word_ordered(const volatile bm_word_t* const addr, atomic_memory_order memory_order) { -+ if (memory_order == memory_order_relaxed || memory_order == memory_order_release) { -+ return Atomic::load(addr); -+ } else { -+ assert(memory_order == memory_order_acq_rel || -+ memory_order == memory_order_acquire || -+ memory_order == memory_order_conservative, -+ "unexpected memory ordering"); -+ return OrderAccess::load_acquire(addr); -+ } -+} -+ -+inline bool BitMap::par_at(idx_t index, atomic_memory_order memory_order) const { -+ verify_index(index); -+ assert(memory_order == memory_order_acquire || -+ memory_order == memory_order_relaxed, -+ "unexpected memory ordering"); -+ const volatile bm_word_t* const addr = word_addr(index); -+ return (load_word_ordered(addr, memory_order) & bit_mask(index)) != 0; -+} -+ -+inline bool BitMap::par_set_bit(idx_t bit, atomic_memory_order memory_order) { - verify_index(bit); - volatile bm_word_t* const addr = word_addr(bit); - const bm_word_t mask = bit_mask(bit); -- bm_word_t old_val = *addr; -+ bm_word_t old_val = load_word_ordered(addr, memory_order); - - do { - const bm_word_t new_val = old_val | mask; - if (new_val == old_val) { - return false; // Someone else beat us to it. - } -- const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val); -+ const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val, memory_order); - if (cur_val == old_val) { - return true; // Success. - } -@@ -57,18 +79,18 @@ inline bool BitMap::par_set_bit(idx_t bit) { - } while (true); - } - --inline bool BitMap::par_clear_bit(idx_t bit) { -+inline bool BitMap::par_clear_bit(idx_t bit, atomic_memory_order memory_order) { - verify_index(bit); - volatile bm_word_t* const addr = word_addr(bit); - const bm_word_t mask = ~bit_mask(bit); -- bm_word_t old_val = *addr; -+ bm_word_t old_val = load_word_ordered(addr, memory_order); - - do { - const bm_word_t new_val = old_val & mask; - if (new_val == old_val) { - return false; // Someone else beat us to it. - } -- const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val); -+ const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val, memory_order); - if (cur_val == old_val) { - return true; // Success. - } diff --git a/8233506-ZGC-the-load-for-Reference.get-can-be-conver.patch b/8233506-ZGC-the-load-for-Reference.get-can-be-conver.patch deleted file mode 100644 index 9c07ceaf73973bbf9d50ca88435f9de01d43c2bf..0000000000000000000000000000000000000000 --- a/8233506-ZGC-the-load-for-Reference.get-can-be-conver.patch +++ /dev/null @@ -1,472 +0,0 @@ -From aa824cddc917b1fcac41a0efe5e8c794f2d5cff9 Mon Sep 17 00:00:00 2001 -Date: Thu, 26 Mar 2020 16:17:45 +0000 -Subject: [PATCH] 8233506:ZGC: the load for Reference.get() can be converted to - a load for strong refs Summary: : LLT: JDK8233506 - Bug url: https://bugs.openjdk.java.net/browse/JDK-8233506 - ---- - src/hotspot/share/gc/shared/c2/barrierSetC2.cpp | 73 +++++++++++++++---------- - src/hotspot/share/gc/shared/c2/barrierSetC2.hpp | 7 ++- - src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp | 42 +++++--------- - src/hotspot/share/opto/graphKit.cpp | 9 +-- - src/hotspot/share/opto/graphKit.hpp | 10 ++-- - src/hotspot/share/opto/memnode.cpp | 9 ++- - src/hotspot/share/opto/memnode.hpp | 7 ++- - 7 files changed, 85 insertions(+), 72 deletions(-) - -diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp -index 545275644..48fe04b08 100644 ---- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp -+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp -@@ -115,10 +115,13 @@ Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) con - - Node* load; - if (in_native) { -- load = kit->make_load(control, adr, val_type, access.type(), mo); -+ load = kit->make_load(control, adr, val_type, access.type(), mo, dep, -+ requires_atomic_access, unaligned, -+ mismatched, unsafe, access.barrier_data()); - } else { - load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo, -- dep, requires_atomic_access, unaligned, mismatched, unsafe); -+ dep, requires_atomic_access, unaligned, mismatched, unsafe, -+ access.barrier_data()); - } - - access.set_raw_access(load); -@@ -348,28 +351,28 @@ Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* - if (adr->bottom_type()->is_ptr_to_narrowoop()) { - Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); - Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); -- load_store = kit->gvn().transform(new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo)); -+ load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo); - } else - #endif - { -- load_store = kit->gvn().transform(new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo)); -+ load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo); - } - } else { - switch (access.type()) { - case T_BYTE: { -- load_store = kit->gvn().transform(new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo)); -+ load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); - break; - } - case T_SHORT: { -- load_store = kit->gvn().transform(new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo)); -+ load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); - break; - } - case T_INT: { -- load_store = kit->gvn().transform(new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo)); -+ load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); - break; - } - case T_LONG: { -- load_store = kit->gvn().transform(new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo)); -+ load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); - break; - } - default: -@@ -377,6 +380,9 @@ Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* - } - } - -+ load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); -+ load_store = kit->gvn().transform(load_store); -+ - access.set_raw_access(load_store); - pin_atomic_op(access); - -@@ -405,50 +411,50 @@ Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node - Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); - Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); - if (is_weak_cas) { -- load_store = kit->gvn().transform(new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo)); -+ load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); - } else { -- load_store = kit->gvn().transform(new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo)); -+ load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); - } - } else - #endif - { - if (is_weak_cas) { -- load_store = kit->gvn().transform(new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo)); -+ load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); - } else { -- load_store = kit->gvn().transform(new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo)); -+ load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); - } - } - } else { - switch(access.type()) { - case T_BYTE: { - if (is_weak_cas) { -- load_store = kit->gvn().transform(new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo)); -+ load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); - } else { -- load_store = kit->gvn().transform(new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo)); -+ load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); - } - break; - } - case T_SHORT: { - if (is_weak_cas) { -- load_store = kit->gvn().transform(new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo)); -+ load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); - } else { -- load_store = kit->gvn().transform(new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo)); -+ load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); - } - break; - } - case T_INT: { - if (is_weak_cas) { -- load_store = kit->gvn().transform(new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo)); -+ load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); - } else { -- load_store = kit->gvn().transform(new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo)); -+ load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); - } - break; - } - case T_LONG: { - if (is_weak_cas) { -- load_store = kit->gvn().transform(new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo)); -+ load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); - } else { -- load_store = kit->gvn().transform(new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo)); -+ load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); - } - break; - } -@@ -457,6 +463,9 @@ Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node - } - } - -+ load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); -+ load_store = kit->gvn().transform(load_store); -+ - access.set_raw_access(load_store); - pin_atomic_op(access); - -@@ -478,27 +487,30 @@ Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_va - } else - #endif - { -- load_store = kit->gvn().transform(new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr())); -+ load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr()); - } - } else { - switch (access.type()) { - case T_BYTE: -- load_store = kit->gvn().transform(new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type)); -+ load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type); - break; - case T_SHORT: -- load_store = kit->gvn().transform(new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type)); -+ load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type); - break; - case T_INT: -- load_store = kit->gvn().transform(new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type)); -+ load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type); - break; - case T_LONG: -- load_store = kit->gvn().transform(new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type)); -+ load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type); - break; - default: - ShouldNotReachHere(); - } - } - -+ load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); -+ load_store = kit->gvn().transform(load_store); -+ - access.set_raw_access(load_store); - pin_atomic_op(access); - -@@ -520,21 +532,24 @@ Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicAccess& access, Node* new_val - - switch(access.type()) { - case T_BYTE: -- load_store = kit->gvn().transform(new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type)); -+ load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type); - break; - case T_SHORT: -- load_store = kit->gvn().transform(new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type)); -+ load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type); - break; - case T_INT: -- load_store = kit->gvn().transform(new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type)); -+ load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type); - break; - case T_LONG: -- load_store = kit->gvn().transform(new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type)); -+ load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type); - break; - default: - ShouldNotReachHere(); - } - -+ load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); -+ load_store = kit->gvn().transform(load_store); -+ - access.set_raw_access(load_store); - pin_atomic_op(access); - -diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp -index 487988bd8..8b4be7d11 100644 ---- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp -+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp -@@ -96,6 +96,7 @@ protected: - Node* _base; - C2AccessValuePtr& _addr; - Node* _raw_access; -+ uint8_t _barrier_data; - - void fixup_decorators(); - void* barrier_set_state() const; -@@ -108,7 +109,8 @@ public: - _type(type), - _base(base), - _addr(addr), -- _raw_access(NULL) -+ _raw_access(NULL), -+ _barrier_data(0) - { - fixup_decorators(); - } -@@ -122,6 +124,9 @@ public: - bool is_raw() const { return (_decorators & AS_RAW) != 0; } - Node* raw_access() const { return _raw_access; } - -+ uint8_t barrier_data() const { return _barrier_data; } -+ void set_barrier_data(uint8_t data) { _barrier_data = data; } -+ - void set_raw_access(Node* raw_access) { _raw_access = raw_access; } - virtual void set_memory() {} // no-op for normal accesses, but not for atomic accesses. - -diff --git a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp -index a12973464..e178761a0 100644 ---- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp -+++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp -@@ -174,48 +174,36 @@ int ZBarrierSetC2::estimate_stub_size() const { - return size; - } - --static bool barrier_needed(C2Access access) { -- return ZBarrierSet::barrier_needed(access.decorators(), access.type()); --} -- --Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { -- Node* result = BarrierSetC2::load_at_resolved(access, val_type); -- if (barrier_needed(access) && access.raw_access()->is_Mem()) { -- if ((access.decorators() & ON_WEAK_OOP_REF) != 0) { -- access.raw_access()->as_Load()->set_barrier_data(ZLoadBarrierWeak); -+static void set_barrier_data(C2Access& access) { -+ if (ZBarrierSet::barrier_needed(access.decorators(), access.type())) { -+ if (access.decorators() & ON_WEAK_OOP_REF) { -+ access.set_barrier_data(ZLoadBarrierWeak); - } else { -- access.raw_access()->as_Load()->set_barrier_data(ZLoadBarrierStrong); -+ access.set_barrier_data(ZLoadBarrierStrong); - } - } -+} - -- return result; -+Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { -+ set_barrier_data(access); -+ return BarrierSetC2::load_at_resolved(access, val_type); - } - - Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val, - Node* new_val, const Type* val_type) const { -- Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type); -- if (barrier_needed(access)) { -- access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong); -- } -- return result; -+ set_barrier_data(access); -+ return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type); - } - - Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val, - Node* new_val, const Type* value_type) const { -- Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); -- if (barrier_needed(access)) { -- access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong); -- } -- return result; -- -+ set_barrier_data(access); -+ return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); - } - - Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* val_type) const { -- Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type); -- if (barrier_needed(access)) { -- access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong); -- } -- return result; -+ set_barrier_data(access); -+ return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type); - } - - bool ZBarrierSetC2::array_copy_requires_gc_barriers(BasicType type) const { -diff --git a/src/hotspot/share/opto/graphKit.cpp b/src/hotspot/share/opto/graphKit.cpp -index 7bf2f6cfb..a1547b42f 100644 ---- a/src/hotspot/share/opto/graphKit.cpp -+++ b/src/hotspot/share/opto/graphKit.cpp -@@ -1493,18 +1493,19 @@ Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, - bool require_atomic_access, - bool unaligned, - bool mismatched, -- bool unsafe) { -+ bool unsafe, -+ uint8_t barrier_data) { - assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" ); - const TypePtr* adr_type = NULL; // debug-mode-only argument - debug_only(adr_type = C->get_adr_type(adr_idx)); - Node* mem = memory(adr_idx); - Node* ld; - if (require_atomic_access && bt == T_LONG) { -- ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe); -+ ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe, barrier_data); - } else if (require_atomic_access && bt == T_DOUBLE) { -- ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe); -+ ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe, barrier_data); - } else { -- ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency, unaligned, mismatched, unsafe); -+ ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency, unaligned, mismatched, unsafe, barrier_data); - } - ld = _gvn.transform(ld); - if (((bt == T_OBJECT) && C->do_escape_analysis()) || C->eliminate_boxing()) { -diff --git a/src/hotspot/share/opto/graphKit.hpp b/src/hotspot/share/opto/graphKit.hpp -index 07c20bbd5..df5d18ccc 100644 ---- a/src/hotspot/share/opto/graphKit.hpp -+++ b/src/hotspot/share/opto/graphKit.hpp -@@ -518,27 +518,27 @@ class GraphKit : public Phase { - Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, - MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest, - bool require_atomic_access = false, bool unaligned = false, -- bool mismatched = false, bool unsafe = false) { -+ bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0) { - // This version computes alias_index from bottom_type - return make_load(ctl, adr, t, bt, adr->bottom_type()->is_ptr(), - mo, control_dependency, require_atomic_access, -- unaligned, mismatched, unsafe); -+ unaligned, mismatched, unsafe, barrier_data); - } - Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type, - MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest, - bool require_atomic_access = false, bool unaligned = false, -- bool mismatched = false, bool unsafe = false) { -+ bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0) { - // This version computes alias_index from an address type - assert(adr_type != NULL, "use other make_load factory"); - return make_load(ctl, adr, t, bt, C->get_alias_index(adr_type), - mo, control_dependency, require_atomic_access, -- unaligned, mismatched, unsafe); -+ unaligned, mismatched, unsafe, barrier_data); - } - // This is the base version which is given an alias index. - Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx, - MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest, - bool require_atomic_access = false, bool unaligned = false, -- bool mismatched = false, bool unsafe = false); -+ bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0); - - // Create & transform a StoreNode and store the effect into the - // parser's memory state. -diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp -index ee0f09e11..ff0a5726c 100644 ---- a/src/hotspot/share/opto/memnode.cpp -+++ b/src/hotspot/share/opto/memnode.cpp -@@ -808,7 +808,7 @@ bool LoadNode::is_immutable_value(Node* adr) { - //----------------------------LoadNode::make----------------------------------- - // Polymorphic factory method: - Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo, -- ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) { -+ ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) { - Compile* C = gvn.C; - - // sanity check the alias category against the created node type -@@ -859,6 +859,7 @@ Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypeP - if (unsafe) { - load->set_unsafe_access(); - } -+ load->set_barrier_data(barrier_data); - if (load->Opcode() == Op_LoadN) { - Node* ld = gvn.transform(load); - return new DecodeNNode(ld, ld->bottom_type()->make_ptr()); -@@ -868,7 +869,7 @@ Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypeP - } - - LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, -- ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) { -+ ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) { - bool require_atomic = true; - LoadLNode* load = new LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency, require_atomic); - if (unaligned) { -@@ -880,11 +881,12 @@ LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr - if (unsafe) { - load->set_unsafe_access(); - } -+ load->set_barrier_data(barrier_data); - return load; - } - - LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, -- ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) { -+ ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) { - bool require_atomic = true; - LoadDNode* load = new LoadDNode(ctl, mem, adr, adr_type, rt, mo, control_dependency, require_atomic); - if (unaligned) { -@@ -896,6 +898,7 @@ LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr - if (unsafe) { - load->set_unsafe_access(); - } -+ load->set_barrier_data(barrier_data); - return load; - } - -diff --git a/src/hotspot/share/opto/memnode.hpp b/src/hotspot/share/opto/memnode.hpp -index 7468abdbc..14a4a67c6 100644 ---- a/src/hotspot/share/opto/memnode.hpp -+++ b/src/hotspot/share/opto/memnode.hpp -@@ -227,7 +227,8 @@ public: - static Node* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr, - const TypePtr* at, const Type *rt, BasicType bt, - MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest, -- bool unaligned = false, bool mismatched = false, bool unsafe = false); -+ bool unaligned = false, bool mismatched = false, bool unsafe = false, -+ uint8_t barrier_data = 0); - - virtual uint hash() const; // Check the type - -@@ -408,7 +409,7 @@ public: - bool require_atomic_access() const { return _require_atomic_access; } - static LoadLNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, - const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest, -- bool unaligned = false, bool mismatched = false, bool unsafe = false); -+ bool unaligned = false, bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0); - #ifndef PRODUCT - virtual void dump_spec(outputStream *st) const { - LoadNode::dump_spec(st); -@@ -460,7 +461,7 @@ public: - bool require_atomic_access() const { return _require_atomic_access; } - static LoadDNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, - const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest, -- bool unaligned = false, bool mismatched = false, bool unsafe = false); -+ bool unaligned = false, bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0); - #ifndef PRODUCT - virtual void dump_spec(outputStream *st) const { - LoadNode::dump_spec(st); --- -2.12.3 - diff --git a/Add-KAE-implementation.patch b/Add-KAE-implementation.patch index db5193ad1e4cc9000d4ab544cacccf47b1e0404b..f91f5ba9473c880f53e1e6ccdcce569b01e723f4 100644 --- a/Add-KAE-implementation.patch +++ b/Add-KAE-implementation.patch @@ -158,8 +158,8 @@ index 6672d26a5..c3e8ceb35 100644 +JDKOPT_DETECT_KAE JDKOPT_DETECT_INTREE_EC - JDKOPT_ENABLE_DISABLE_FAILURE_HANDLER - JDKOPT_ENABLE_DISABLE_GENERATE_CLASSLIST + LIB_TESTS_ENABLE_DISABLE_FAILURE_HANDLER + diff --git a/make/autoconf/jdk-options.m4 b/make/autoconf/jdk-options.m4 index 9d64b31bf..e20eafa60 100644 --- a/make/autoconf/jdk-options.m4 diff --git a/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch b/Add-riscv64-support.patch similarity index 83% rename from 2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch rename to Add-riscv64-support.patch index 13815b71215d789e75976982277c6f8b05762627..b06b0fe866fb0ae52a579323d4ef4e80effd0c9b 100644 --- a/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch +++ b/Add-riscv64-support.patch @@ -1,435 +1,163 @@ -From 77eaf1804b7e56ed17a6c3a478e6ee9df89ea024 Mon Sep 17 00:00:00 2001 -From: misaka00251 -Date: Wed, 9 Aug 2023 02:24:23 +0800 -Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) - ---- - make/autoconf/build-aux/config.sub | 7 + - make/autoconf/hotspot.m4 | 3 +- - make/autoconf/libraries.m4 | 4 +- - make/autoconf/platform.m4 | 10 +- - make/hotspot/gensrc/GensrcAdlc.gmk | 16 +- - src/hotspot/cpu/aarch64/aarch64.ad | 40 +- - .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 4 +- - .../cpu/aarch64/macroAssembler_aarch64.cpp | 64 + - .../cpu/aarch64/macroAssembler_aarch64.hpp | 3 + - src/hotspot/cpu/arm/arm.ad | 10 +- - src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp | 5 +- - src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp | 5 +- - src/hotspot/cpu/ppc/ppc.ad | 16 +- - .../cpu/riscv/abstractInterpreter_riscv.cpp | 185 + - src/hotspot/cpu/riscv/assembler_riscv.cpp | 365 + - src/hotspot/cpu/riscv/assembler_riscv.hpp | 2004 +++ - .../cpu/riscv/assembler_riscv.inline.hpp | 47 + - src/hotspot/cpu/riscv/bytes_riscv.hpp | 169 + - src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 352 + - src/hotspot/cpu/riscv/c1_Defs_riscv.hpp | 85 + - .../cpu/riscv/c1_FpuStackSim_riscv.cpp | 31 + - .../cpu/riscv/c1_FpuStackSim_riscv.hpp | 33 + - src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 391 + - src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp | 149 + - .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 287 + - .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp | 36 + - .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp | 387 + - .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp | 51 + - .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 2275 ++++ - .../cpu/riscv/c1_LIRAssembler_riscv.hpp | 132 + - .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 1083 ++ - src/hotspot/cpu/riscv/c1_LIR_riscv.cpp | 55 + - src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp | 33 + - src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp | 85 + - .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 441 + - .../cpu/riscv/c1_MacroAssembler_riscv.hpp | 121 + - src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 1206 ++ - src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 72 + - src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 91 + - src/hotspot/cpu/riscv/c2_init_riscv.cpp | 38 + - src/hotspot/cpu/riscv/codeBuffer_riscv.hpp | 36 + - src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 154 + - src/hotspot/cpu/riscv/copy_riscv.hpp | 60 + - src/hotspot/cpu/riscv/depChecker_riscv.hpp | 32 + - src/hotspot/cpu/riscv/disassembler_riscv.hpp | 37 + - src/hotspot/cpu/riscv/frame_riscv.cpp | 683 + - src/hotspot/cpu/riscv/frame_riscv.hpp | 200 + - src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 257 + - .../gc/g1/g1BarrierSetAssembler_riscv.cpp | 479 + - .../gc/g1/g1BarrierSetAssembler_riscv.hpp | 78 + - .../gc/shared/barrierSetAssembler_riscv.cpp | 226 + - .../gc/shared/barrierSetAssembler_riscv.hpp | 75 + - .../cardTableBarrierSetAssembler_riscv.cpp | 120 + - .../cardTableBarrierSetAssembler_riscv.hpp | 43 + - .../modRefBarrierSetAssembler_riscv.cpp | 54 + - .../modRefBarrierSetAssembler_riscv.hpp | 55 + - .../c1/shenandoahBarrierSetC1_riscv.cpp | 124 + - .../shenandoahBarrierSetAssembler_riscv.cpp | 743 ++ - .../shenandoahBarrierSetAssembler_riscv.hpp | 92 + - .../riscv/gc/shenandoah/shenandoah_riscv64.ad | 188 + - .../cpu/riscv/globalDefinitions_riscv.hpp | 44 + - src/hotspot/cpu/riscv/globals_riscv.hpp | 120 + - src/hotspot/cpu/riscv/icBuffer_riscv.cpp | 79 + - src/hotspot/cpu/riscv/icache_riscv.cpp | 61 + - src/hotspot/cpu/riscv/icache_riscv.hpp | 42 + - src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1932 +++ - src/hotspot/cpu/riscv/interp_masm_riscv.hpp | 283 + - src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 296 + - src/hotspot/cpu/riscv/interpreterRT_riscv.hpp | 68 + - .../cpu/riscv/javaFrameAnchor_riscv.hpp | 89 + - .../cpu/riscv/jniFastGetField_riscv.cpp | 193 + - src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 108 + - .../cpu/riscv/macroAssembler_riscv.cpp | 5861 +++++++++ - .../cpu/riscv/macroAssembler_riscv.hpp | 975 ++ - .../cpu/riscv/macroAssembler_riscv.inline.hpp | 30 + - src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 440 + - src/hotspot/cpu/riscv/methodHandles_riscv.hpp | 58 + - src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 404 + - src/hotspot/cpu/riscv/nativeInst_riscv.hpp | 561 + - src/hotspot/cpu/riscv/registerMap_riscv.hpp | 46 + - .../cpu/riscv/register_definitions_riscv.cpp | 193 + - src/hotspot/cpu/riscv/register_riscv.cpp | 69 + - src/hotspot/cpu/riscv/register_riscv.hpp | 337 + - src/hotspot/cpu/riscv/relocInfo_riscv.cpp | 113 + - src/hotspot/cpu/riscv/relocInfo_riscv.hpp | 45 + - src/hotspot/cpu/riscv/riscv.ad | 10685 ++++++++++++++++ - src/hotspot/cpu/riscv/riscv_b.ad | 605 + - src/hotspot/cpu/riscv/riscv_v.ad | 1723 +++ - src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2738 ++++ - src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 3743 ++++++ - src/hotspot/cpu/riscv/stubRoutines_riscv.cpp | 60 + - src/hotspot/cpu/riscv/stubRoutines_riscv.hpp | 179 + - .../templateInterpreterGenerator_riscv.cpp | 1841 +++ - src/hotspot/cpu/riscv/templateTable_riscv.cpp | 4028 ++++++ - src/hotspot/cpu/riscv/templateTable_riscv.hpp | 42 + - src/hotspot/cpu/riscv/vmStructs_riscv.hpp | 43 + - .../cpu/riscv/vm_version_ext_riscv.cpp | 91 + - .../cpu/riscv/vm_version_ext_riscv.hpp | 55 + - src/hotspot/cpu/riscv/vm_version_riscv.cpp | 190 + - src/hotspot/cpu/riscv/vm_version_riscv.hpp | 65 + - src/hotspot/cpu/riscv/vmreg_riscv.cpp | 60 + - src/hotspot/cpu/riscv/vmreg_riscv.hpp | 64 + - src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp | 47 + - src/hotspot/cpu/riscv/vtableStubs_riscv.cpp | 260 + - src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp | 5 +- - src/hotspot/cpu/s390/s390.ad | 16 +- - src/hotspot/cpu/sparc/sparc.ad | 10 +- - src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 5 +- - src/hotspot/cpu/x86/macroAssembler_x86.cpp | 93 + - src/hotspot/cpu/x86/macroAssembler_x86.hpp | 2 + - src/hotspot/cpu/x86/x86.ad | 14 +- - src/hotspot/cpu/x86/x86_32.ad | 19 +- - src/hotspot/cpu/x86/x86_64.ad | 24 +- - src/hotspot/os/linux/os_linux.cpp | 11 +- - .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 113 + - .../linux_riscv/bytes_linux_riscv.inline.hpp | 44 + - .../linux_riscv/copy_linux_riscv.inline.hpp | 116 + - .../linux_riscv/globals_linux_riscv.hpp | 43 + - .../linux_riscv/orderAccess_linux_riscv.hpp | 73 + - .../os_cpu/linux_riscv/os_linux_riscv.cpp | 628 + - .../os_cpu/linux_riscv/os_linux_riscv.hpp | 40 + - .../prefetch_linux_riscv.inline.hpp | 38 + - .../os_cpu/linux_riscv/thread_linux_riscv.cpp | 103 + - .../os_cpu/linux_riscv/thread_linux_riscv.hpp | 67 + - .../linux_riscv/vmStructs_linux_riscv.hpp | 55 + - .../linux_riscv/vm_version_linux_riscv.cpp | 116 + - src/hotspot/share/adlc/archDesc.cpp | 5 + - src/hotspot/share/adlc/formssel.cpp | 2 + - src/hotspot/share/c1/c1_LIR.cpp | 113 +- - src/hotspot/share/c1/c1_LIR.hpp | 208 +- - src/hotspot/share/c1/c1_LIRAssembler.cpp | 15 +- - src/hotspot/share/c1/c1_LIRAssembler.hpp | 4 +- - src/hotspot/share/c1/c1_LinearScan.cpp | 14 +- - src/hotspot/share/classfile/vmSymbols.cpp | 2 + - src/hotspot/share/classfile/vmSymbols.hpp | 1 + - .../gc/shenandoah/shenandoahArguments.cpp | 2 +- - .../share/jfr/utilities/jfrBigEndian.hpp | 2 +- - src/hotspot/share/opto/c2compiler.cpp | 1 + - src/hotspot/share/opto/chaitin.cpp | 90 +- - src/hotspot/share/opto/chaitin.hpp | 32 +- - src/hotspot/share/opto/intrinsicnode.hpp | 5 +- - src/hotspot/share/opto/library_call.cpp | 13 +- - src/hotspot/share/opto/machnode.cpp | 2 +- - src/hotspot/share/opto/machnode.hpp | 4 + - src/hotspot/share/opto/matcher.cpp | 41 +- - src/hotspot/share/opto/matcher.hpp | 6 +- - src/hotspot/share/opto/node.cpp | 21 + - src/hotspot/share/opto/node.hpp | 5 + - src/hotspot/share/opto/opcodes.cpp | 4 +- - src/hotspot/share/opto/opcodes.hpp | 2 + - src/hotspot/share/opto/phase.cpp | 2 + - src/hotspot/share/opto/phase.hpp | 1 + - src/hotspot/share/opto/postaloc.cpp | 53 +- - src/hotspot/share/opto/regmask.cpp | 46 +- - src/hotspot/share/opto/regmask.hpp | 10 +- - src/hotspot/share/opto/superword.cpp | 7 +- - src/hotspot/share/opto/type.cpp | 14 +- - src/hotspot/share/opto/type.hpp | 12 +- - src/hotspot/share/opto/vectornode.cpp | 4 +- - .../share/runtime/abstract_vm_version.cpp | 12 +- - src/hotspot/share/runtime/thread.hpp | 2 +- - src/hotspot/share/runtime/thread.inline.hpp | 2 +- - src/hotspot/share/utilities/debug.cpp | 1 + - src/hotspot/share/utilities/macros.hpp | 26 + - .../share/classes/java/lang/StringLatin1.java | 5 + - .../native/libsaproc/LinuxDebuggerLocal.c | 49 +- - .../linux/native/libsaproc/libproc.h | 2 + - .../linux/native/libsaproc/ps_proc.c | 4 + - .../classes/sun/jvm/hotspot/HotSpotAgent.java | 4 + - .../debugger/MachineDescriptionRISCV64.java | 40 + - .../debugger/linux/LinuxCDebugger.java | 11 +- - .../linux/riscv64/LinuxRISCV64CFrame.java | 90 + - .../riscv64/LinuxRISCV64ThreadContext.java | 48 + - .../debugger/proc/ProcDebuggerLocal.java | 6 + - .../proc/riscv64/ProcRISCV64Thread.java | 88 + - .../riscv64/ProcRISCV64ThreadContext.java | 48 + - .../riscv64/ProcRISCV64ThreadFactory.java | 46 + - .../remote/riscv64/RemoteRISCV64Thread.java | 55 + - .../riscv64/RemoteRISCV64ThreadContext.java | 48 + - .../riscv64/RemoteRISCV64ThreadFactory.java | 46 + - .../riscv64/RISCV64ThreadContext.java | 172 + - .../sun/jvm/hotspot/runtime/Threads.java | 3 + - .../LinuxRISCV64JavaThreadPDAccess.java | 132 + - .../riscv64/RISCV64CurrentFrameGuess.java | 223 + - .../hotspot/runtime/riscv64/RISCV64Frame.java | 554 + - .../riscv64/RISCV64JavaCallWrapper.java | 58 + - .../runtime/riscv64/RISCV64RegisterMap.java | 53 + - .../jvm/hotspot/utilities/PlatformInfo.java | 2 +- - src/utils/hsdis/hsdis.c | 6 +- - test/hotspot/jtreg/compiler/c2/TestBit.java | 6 +- - ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java | 4 + - ...HA256IntrinsicsOptionOnUnsupportedCPU.java | 4 + - ...HA512IntrinsicsOptionOnUnsupportedCPU.java | 4 + - .../cli/TestUseSHAOptionOnUnsupportedCPU.java | 4 + - .../testcases/GenericTestCaseForOtherCPU.java | 10 +- - ...nericTestCaseForUnsupportedRISCV64CPU.java | 102 + - .../string/TestStringLatin1IndexOfChar.java | 153 + - .../loopopts/superword/ProdRed_Double.java | 2 +- - .../loopopts/superword/ProdRed_Float.java | 2 +- - .../loopopts/superword/ProdRed_Int.java | 2 +- - .../loopopts/superword/ReductionPerf.java | 2 +- - .../superword/SumRedAbsNeg_Double.java | 2 +- - .../superword/SumRedAbsNeg_Float.java | 2 +- - .../loopopts/superword/SumRedSqrt_Double.java | 2 +- - .../loopopts/superword/SumRed_Double.java | 2 +- - .../loopopts/superword/SumRed_Float.java | 2 +- - .../loopopts/superword/SumRed_Int.java | 2 +- - .../argumentcorruption/CheckLongArgs.java | 2 +- - .../criticalnatives/lookup/LookUp.java | 2 +- - .../sha/predicate/IntrinsicPredicates.java | 9 +- - .../NMT/CheckForProperDetailStackTrace.java | 3 +- - .../ReservedStack/ReservedStackTest.java | 3 +- - test/hotspot/jtreg/test_env.sh | 5 + - ...stMutuallyExclusivePlatformPredicates.java | 3 +- - .../nsk/jvmti/GetThreadInfo/thrinfo001.java | 2 +- - .../jdk/jfr/event/os/TestCPUInformation.java | 5 +- - test/lib/jdk/test/lib/Platform.java | 5 + - .../bench/java/lang/StringIndexOfChar.java | 221 + - 218 files changed, 57653 insertions(+), 221 deletions(-) - create mode 100644 src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.inline.hpp - create mode 100644 src/hotspot/cpu/riscv/bytes_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_Defs_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_LIR_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/c1_globals_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c2_globals_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/c2_init_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/codeBuffer_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/compiledIC_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/copy_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/disassembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/frame_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/frame_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/frame_riscv.inline.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad - create mode 100644 src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/globals_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/icBuffer_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/icache_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/icache_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/jniTypes_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp - create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/register_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/register_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/riscv.ad - create mode 100644 src/hotspot/cpu/riscv/riscv_b.ad - create mode 100644 src/hotspot/cpu/riscv/riscv_v.ad - create mode 100644 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/stubGenerator_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/vmStructs_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.cpp - create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.hpp - create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp - create mode 100644 src/hotspot/cpu/riscv/vtableStubs_riscv.cpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp - create mode 100644 src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java - create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java - create mode 100644 test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java - create mode 100644 test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java - create mode 100644 test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java - +diff --git a/.github/workflows/build-cross-compile.yml b/.github/workflows/build-cross-compile.yml +index 385b097b9f..b1c333f711 100644 +--- a/.github/workflows/build-cross-compile.yml ++++ b/.github/workflows/build-cross-compile.yml +@@ -54,28 +54,39 @@ jobs: + - arm + - s390x + - ppc64le ++ - riscv64 + include: + - target-cpu: aarch64 + gnu-arch: aarch64 + debian-arch: arm64 + debian-repository: https://httpredir.debian.org/debian/ + debian-version: bullseye ++ tolerate-sysroot-errors: false + - target-cpu: arm + gnu-arch: arm + debian-arch: armhf + debian-repository: https://httpredir.debian.org/debian/ + debian-version: bullseye ++ tolerate-sysroot-errors: false + gnu-abi: eabihf + - target-cpu: s390x + gnu-arch: s390x + debian-arch: s390x + debian-repository: https://httpredir.debian.org/debian/ + debian-version: bullseye ++ tolerate-sysroot-errors: false + - target-cpu: ppc64le + gnu-arch: powerpc64le + debian-arch: ppc64el + debian-repository: https://httpredir.debian.org/debian/ + debian-version: bullseye ++ tolerate-sysroot-errors: false ++ - target-cpu: riscv64 ++ gnu-arch: riscv64 ++ debian-arch: riscv64 ++ debian-repository: https://httpredir.debian.org/debian/ ++ debian-version: sid ++ tolerate-sysroot-errors: true + + steps: + - name: 'Checkout the JDK source' +@@ -113,6 +124,7 @@ jobs: + if: steps.get-cached-sysroot.outputs.cache-hit != 'true' + + - name: 'Create sysroot' ++ id: create-sysroot + run: > + sudo debootstrap + --arch=${{ matrix.debian-arch }} +@@ -123,6 +135,7 @@ jobs: + ${{ matrix.debian-version }} + sysroot + ${{ matrix.debian-repository }} ++ continue-on-error: ${{ matrix.tolerate-sysroot-errors }} + if: steps.get-cached-sysroot.outputs.cache-hit != 'true' + + - name: 'Prepare sysroot' +@@ -134,7 +147,12 @@ jobs: + rm -rf sysroot/usr/{sbin,bin,share} + rm -rf sysroot/usr/lib/{apt,gcc,udev,systemd} + rm -rf sysroot/usr/libexec/gcc +- if: steps.get-cached-sysroot.outputs.cache-hit != 'true' ++ if: steps.create-sysroot.outcome == 'success' && steps.get-cached-sysroot.outputs.cache-hit != 'true' ++ ++ - name: 'Remove broken sysroot' ++ run: | ++ sudo rm -rf sysroot/ ++ if: steps.create-sysroot.outcome != 'success' && steps.get-cached-sysroot.outputs.cache-hit != 'true' + + - name: 'Configure' + run: > +@@ -153,6 +171,7 @@ jobs: + echo "Dumping config.log:" && + cat config.log && + exit 1) ++ if: steps.create-sysroot.outcome == 'success' || steps.get-cached-sysroot.outputs.cache-hit == 'true' + + - name: 'Build' + id: build +@@ -160,3 +179,4 @@ jobs: + with: + make-target: 'hotspot ${{ inputs.make-arguments }}' + platform: linux-${{ matrix.target-cpu }} ++ if: steps.create-sysroot.outcome == 'success' || steps.get-cached-sysroot.outputs.cache-hit == 'true' +diff --git a/.jcheck/conf b/.jcheck/conf +index be7ad6d26f..e35eb77696 100644 +--- a/.jcheck/conf ++++ b/.jcheck/conf +@@ -1,5 +1,5 @@ + [general] +-project=jdk-updates ++project=riscv-port + jbs=JDK + version=11.0.24 + diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub -index 3c280ac7c..eda408e01 100644 +index 3c280ac7c0..6c66c221e0 100644 --- a/make/autoconf/build-aux/config.sub +++ b/make/autoconf/build-aux/config.sub -@@ -48,6 +48,13 @@ if ! echo $* | grep '^aarch64-' >/dev/null ; then +@@ -1,6 +1,6 @@ + #!/bin/sh + # +-# Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + # + # This code is free software; you can redistribute it and/or modify it +@@ -40,6 +40,13 @@ if echo $* | grep pc-msys >/dev/null ; then exit fi +# Canonicalize for riscv which autoconf-config.sub doesn't handle -+if echo $* | grep '^riscv\(32\|64\)-linux' > /dev/null ; then ++if echo $* | grep '^riscv\(32\|64\)-linux' >/dev/null ; then + result=`echo $@ | sed 's/linux/unknown-linux/'` + echo $result + exit +fi + - while test $# -gt 0 ; do - case $1 in - -- ) # Stop option processing + # First, filter out everything that doesn't begin with "aarch64-" + if ! echo $* | grep '^aarch64-' >/dev/null ; then + . $DIR/autoconf-config.sub "$@" diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4 -index a3e1e00b2..01ef26c10 100644 +index 9bb34363e5..f84e8f84c6 100644 --- a/make/autoconf/hotspot.m4 +++ b/make/autoconf/hotspot.m4 -@@ -367,7 +367,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], +@@ -370,7 +370,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], AC_MSG_CHECKING([if shenandoah can be built]) if HOTSPOT_CHECK_JVM_FEATURE(shenandoahgc); then if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \ - test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then + test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \ -+ test "x$OPENJDK_TARGET_CPU" = "xriscv64" ; then ++ test "x$OPENJDK_TARGET_CPU" = "xriscv64"; then AC_MSG_RESULT([yes]) else DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc" -diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4 -index 16e906bdc..c01fdbcce 100644 ---- a/make/autoconf/libraries.m4 -+++ b/make/autoconf/libraries.m4 -@@ -110,7 +110,7 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], - GLOBAL_LIBS="" - fi - -- BASIC_JDKLIB_LIBS="" -+ BASIC_JDKLIB_LIBS="-latomic" - if test "x$TOOLCHAIN_TYPE" != xmicrosoft; then - BASIC_JDKLIB_LIBS="-ljava -ljvm" - fi -@@ -147,6 +147,8 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], - wsock32.lib winmm.lib version.lib psapi.lib" - fi - -+ BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic" -+ - JDKLIB_LIBS="$BASIC_JDKLIB_LIBS" - JDKEXE_LIBS="" - JVM_LIBS="$BASIC_JVM_LIBS" diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 -index f89b22f5f..48d615992 100644 +index 5d1d9efa39..565ca18e20 100644 --- a/make/autoconf/platform.m4 +++ b/make/autoconf/platform.m4 -@@ -120,6 +120,12 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU], - VAR_CPU_BITS=64 - VAR_CPU_ENDIAN=little - ;; -+ riscv32) -+ VAR_CPU=riscv32 -+ VAR_CPU_ARCH=riscv -+ VAR_CPU_BITS=32 -+ VAR_CPU_ENDIAN=little -+ ;; - riscv64) - VAR_CPU=riscv64 - VAR_CPU_ARCH=riscv -@@ -564,8 +570,10 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], - HOTSPOT_$1_CPU_DEFINE=S390 - elif test "x$OPENJDK_$1_CPU" = xs390x; then - HOTSPOT_$1_CPU_DEFINE=S390 -+ elif test "x$OPENJDK_$1_CPU" = xriscv32; then -+ HOTSPOT_$1_CPU_DEFINE=RISCV32 - elif test "x$OPENJDK_$1_CPU" = xriscv64; then -- HOTSPOT_$1_CPU_DEFINE=RISCV +@@ -1,5 +1,5 @@ + # +-# Copyright (c) 2011, 2021, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. + # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + # + # This code is free software; you can redistribute it and/or modify it +@@ -554,6 +554,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], + HOTSPOT_$1_CPU_DEFINE=PPC64 + elif test "x$OPENJDK_$1_CPU" = xppc64le; then + HOTSPOT_$1_CPU_DEFINE=PPC64 ++ elif test "x$OPENJDK_$1_CPU" = xriscv64; then + HOTSPOT_$1_CPU_DEFINE=RISCV64 - elif test "x$OPENJDK_$1_CPU" != x; then - HOTSPOT_$1_CPU_DEFINE=$(echo $OPENJDK_$1_CPU | tr a-z A-Z) - fi + + # The cpu defines below are for zero, we don't support them directly. + elif test "x$OPENJDK_$1_CPU" = xsparc; then diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk -index c5a3ac572..9de6f663c 100644 +index c5a3ac5724..51137b99db 100644 --- a/make/hotspot/gensrc/GensrcAdlc.gmk +++ b/make/hotspot/gensrc/GensrcAdlc.gmk @@ -1,5 +1,5 @@ @@ -439,20 +167,12 @@ index c5a3ac572..9de6f663c 100644 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it -@@ -150,6 +150,20 @@ ifeq ($(call check-jvm-feature, compiler2), true) +@@ -150,6 +150,12 @@ ifeq ($(call check-jvm-feature, compiler2), true) $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \ ))) -+ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64) -+ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ -+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \ -+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \ -+ ))) -+ endif -+ + ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ -+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \ + ))) + endif @@ -460,303 +180,14 @@ index c5a3ac572..9de6f663c 100644 ifeq ($(call check-jvm-feature, shenandoahgc), true) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ -diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad -index 1e4ee33a9..ac5d56f0f 100644 ---- a/src/hotspot/cpu/aarch64/aarch64.ad -+++ b/src/hotspot/cpu/aarch64/aarch64.ad -@@ -2062,15 +2062,17 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen -- bool ret_value = match_rule_supported(opcode); -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; -+ } - // Add rules here. - -- return ret_value; // Per default match rules are supported. -+ return true; // Per default match rules are supported. - } - - const bool Matcher::has_predicated_vectors(void) { -@@ -2129,6 +2131,14 @@ const int Matcher::min_vector_size(const BasicType bt) { - return size; - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // Vector ideal reg. - const uint Matcher::vector_ideal_reg(int len) { - switch(len) { -@@ -15515,15 +15525,16 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, - ins_pipe(pipe_class_memory); - %} - --instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, -+instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, - iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, - iRegINoSp tmp3, rFlagsReg cr) - %{ - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(((StrIndexOfCharNode*)n) ->encoding() == StrIntrinsicNode::U); - effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, - TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); - -- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %} -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} - - ins_encode %{ - __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, -@@ -15533,6 +15544,25 @@ instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, - ins_pipe(pipe_class_memory); - %} - -+instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, -+ iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, rFlagsReg cr) -+%{ -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); -+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); -+ -+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result" %} -+ -+ ins_encode %{ -+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, -+ $result$$Register, $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register); -+ %} -+ ins_pipe(pipe_class_memory); -+%} -+ - instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt, - iRegI_R0 result, rFlagsReg cr) - %{ -diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp -index fdd2c0ca3..1a35be210 100644 ---- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp -+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp -@@ -1593,7 +1593,9 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { - } - - --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { -+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on aarch64"); - - Assembler::Condition acond, ncond; - switch (condition) { -diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -index 5753cc9a6..21c6fdf19 100644 ---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -@@ -4829,6 +4829,70 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, - BIND(DONE); - } - -+void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, Register tmp3) -+{ -+ Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE; -+ Register cnt1_neg = cnt1; -+ Register ch1 = rscratch1; -+ Register result_tmp = rscratch2; -+ -+ cbz(cnt1, NOMATCH); -+ -+ cmp(cnt1, (u1)8); -+ br(LT, DO1_SHORT); -+ -+ orr(ch, ch, ch, LSL, 8); -+ orr(ch, ch, ch, LSL, 16); -+ orr(ch, ch, ch, LSL, 32); -+ -+ sub(cnt1, cnt1, 8); -+ mov(result_tmp, cnt1); -+ lea(str1, Address(str1, cnt1)); -+ sub(cnt1_neg, zr, cnt1); -+ -+ mov(tmp3, 0x0101010101010101); -+ -+ BIND(CH1_LOOP); -+ ldr(ch1, Address(str1, cnt1_neg)); -+ eor(ch1, ch, ch1); -+ sub(tmp1, ch1, tmp3); -+ orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f); -+ bics(tmp1, tmp1, tmp2); -+ br(NE, HAS_ZERO); -+ adds(cnt1_neg, cnt1_neg, 8); -+ br(LT, CH1_LOOP); -+ -+ cmp(cnt1_neg, (u1)8); -+ mov(cnt1_neg, 0); -+ br(LT, CH1_LOOP); -+ b(NOMATCH); -+ -+ BIND(HAS_ZERO); -+ rev(tmp1, tmp1); -+ clz(tmp1, tmp1); -+ add(cnt1_neg, cnt1_neg, tmp1, LSR, 3); -+ b(MATCH); -+ -+ BIND(DO1_SHORT); -+ mov(result_tmp, cnt1); -+ lea(str1, Address(str1, cnt1)); -+ sub(cnt1_neg, zr, cnt1); -+ BIND(DO1_LOOP); -+ ldrb(ch1, Address(str1, cnt1_neg)); -+ cmp(ch, ch1); -+ br(EQ, MATCH); -+ adds(cnt1_neg, cnt1_neg, 1); -+ br(LT, DO1_LOOP); -+ BIND(NOMATCH); -+ mov(result, -1); -+ b(DONE); -+ BIND(MATCH); -+ add(result, result_tmp, cnt1_neg); -+ BIND(DONE); -+} -+ - // Compare strings. - void MacroAssembler::string_compare(Register str1, Register str2, - Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, -diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -index 7e23c16a4..c3d472a9a 100644 ---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -@@ -1260,6 +1260,9 @@ public: - void string_indexof_char(Register str1, Register cnt1, - Register ch, Register result, - Register tmp1, Register tmp2, Register tmp3); -+ void stringL_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, Register tmp3); - void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2, - FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5, - FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3, -diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad -index 51f2d9ce7..71f83521e 100644 ---- a/src/hotspot/cpu/arm/arm.ad -+++ b/src/hotspot/cpu/arm/arm.ad -@@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for -@@ -1121,6 +1121,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) { - return MaxVectorSize; - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // Vector ideal reg corresponding to specified size in bytes - const uint Matcher::vector_ideal_reg(int size) { - assert(MaxVectorSize >= size, ""); -diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp -index f0a7229aa..2d06d3d58 100644 ---- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp -+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp -@@ -1824,7 +1824,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { - } - - --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { -+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on arm"); -+ - AsmCondition acond = al; - AsmCondition ncond = nv; - if (opr1 != opr2) { -diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -index 847f7d61d..d081116be 100644 ---- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -@@ -1554,7 +1554,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) { - } - - --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { -+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on ppc"); -+ - if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) { - load_to_reg(this, opr1, result); // Condition doesn't matter. - return; -diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad -index ebbe80a26..df66a46dc 100644 ---- a/src/hotspot/cpu/ppc/ppc.ad -+++ b/src/hotspot/cpu/ppc/ppc.ad -@@ -2242,15 +2242,17 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen -- bool ret_value = match_rule_supported(opcode); -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; -+ } - // Add rules here. - -- return ret_value; // Per default match rules are supported. -+ return true; // Per default match rules are supported. - } - - const bool Matcher::has_predicated_vectors(void) { -@@ -2310,6 +2312,14 @@ const int Matcher::min_vector_size(const BasicType bt) { - return max_vector_size(bt); // Same as max. - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // PPC implementation uses VSX load/store instructions (if - // SuperwordUseVSX) which support 4 byte but not arbitrary alignment - const bool Matcher::misaligned_vectors_ok() { diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp new file mode 100644 -index 000000000..5661b7425 +index 0000000000..31c63abe71 --- /dev/null +++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp -@@ -0,0 +1,185 @@ +@@ -0,0 +1,177 @@ +/* -+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -784,13 +215,13 @@ index 000000000..5661b7425 +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/constMethod.hpp" ++#include "oops/klass.inline.hpp" +#include "oops/method.hpp" +#include "runtime/frame.inline.hpp" +#include "utilities/align.hpp" +#include "utilities/debug.hpp" +#include "utilities/macros.hpp" + -+ +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { @@ -880,7 +311,6 @@ index 000000000..5661b7425 + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a + // skeletal state -+ + assert_cond(method != NULL && caller != NULL && interpreter_frame != NULL); + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * @@ -894,14 +324,6 @@ index 000000000..5661b7425 + // NOTE the difference in using sender_sp and interpreter_frame_sender_sp + // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) + // and sender_sp is fp -+ // -+ // The interpreted method entry on riscv aligns SP to 16 bytes -+ // before generating the fixed part of the activation frame. So there -+ // may be a gap between the locals block and the saved sender SP. For -+ // an interpreted caller we need to recreate this gap and exactly -+ // align the incoming parameters with the caller's temporary -+ // expression stack. For other types of caller frame it doesn't -+ // matter. + intptr_t* locals = NULL; + if (caller->is_interpreted_frame()) { + locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1; @@ -935,6 +357,7 @@ index 000000000..5661b7425 + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + + extra_locals); + } ++ + *interpreter_frame->interpreter_frame_cache_addr() = + method->constants()->cache(); + *interpreter_frame->interpreter_frame_mirror_addr() = @@ -942,14 +365,14 @@ index 000000000..5661b7425 +} diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp new file mode 100644 -index 000000000..40ecf1a6c +index 0000000000..a83d43a8f1 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp @@ -0,0 +1,365 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -969,6 +392,7 @@ index 000000000..40ecf1a6c + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. ++ * + */ + +#include @@ -983,8 +407,6 @@ index 000000000..40ecf1a6c +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/sharedRuntime.hpp" + -+#define __ _masm. -+ +int AbstractAssembler::code_fill_byte() { + return 0; +} @@ -999,7 +421,7 @@ index 000000000..40ecf1a6c + } +} + -+void Assembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { ++void Assembler::addw(Register Rd, Register Rn, int64_t increment, Register temp) { + if (is_imm_in_range(increment, 12, 0)) { + addiw(Rd, Rn, increment); + } else { @@ -1019,7 +441,7 @@ index 000000000..40ecf1a6c + } +} + -+void Assembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { ++void Assembler::subw(Register Rd, Register Rn, int64_t decrement, Register temp) { + if (is_imm_in_range(-decrement, 12, 0)) { + addiw(Rd, Rn, -decrement); + } else { @@ -1033,11 +455,12 @@ index 000000000..40ecf1a6c + add_uw(Rd, Rs, zr); +} + -+void Assembler::li(Register Rd, int64_t imm) { ++void Assembler::_li(Register Rd, int64_t imm) { + // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff + int shift = 12; + int64_t upper = imm, lower = imm; -+ // Split imm to a lower 12-bit sign-extended part and the remainder, because addi will sign-extend the lower imm. ++ // Split imm to a lower 12-bit sign-extended part and the remainder, ++ // because addi will sign-extend the lower imm. + lower = ((int32_t)imm << 20) >> 20; + upper -= lower; + @@ -1051,8 +474,7 @@ index 000000000..40ecf1a6c + if (lower != 0) { + addi(Rd, Rd, lower); + } -+ } -+ else { ++ } else { + // 32-bit integer + Register hi_Rd = zr; + if (upper != 0) { @@ -1066,30 +488,30 @@ index 000000000..40ecf1a6c +} + +void Assembler::li64(Register Rd, int64_t imm) { -+ // Load upper 32 bits. Upper = imm[63:32], but if imm[31] = 1 or (imm[31:28] == 0x7ff && imm[19] == 1), -+ // upper = imm[63:32] + 1. -+ int64_t lower = imm & 0xffffffff; -+ lower -= ((lower << 44) >> 44); -+ int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; -+ int32_t upper = (tmp_imm - (int32_t)lower) >> 32; -+ -+ // Load upper 32 bits -+ int64_t up = upper, lo = upper; -+ lo = (lo << 52) >> 52; -+ up -= lo; -+ up = (int32_t)up; -+ lui(Rd, up); -+ addi(Rd, Rd, lo); -+ -+ // Load the rest 32 bits. -+ slli(Rd, Rd, 12); -+ addi(Rd, Rd, (int32_t)lower >> 20); -+ slli(Rd, Rd, 12); -+ lower = ((int32_t)imm << 12) >> 20; -+ addi(Rd, Rd, lower); -+ slli(Rd, Rd, 8); -+ lower = imm & 0xff; -+ addi(Rd, Rd, lower); ++ // Load upper 32 bits. upper = imm[63:32], but if imm[31] == 1 or ++ // (imm[31:28] == 0x7ff && imm[19] == 1), upper = imm[63:32] + 1. ++ int64_t lower = imm & 0xffffffff; ++ lower -= ((lower << 44) >> 44); ++ int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; ++ int32_t upper = (tmp_imm - (int32_t)lower) >> 32; ++ ++ // Load upper 32 bits ++ int64_t up = upper, lo = upper; ++ lo = (lo << 52) >> 52; ++ up -= lo; ++ up = (int32_t)up; ++ lui(Rd, up); ++ addi(Rd, Rd, lo); ++ ++ // Load the rest 32 bits. ++ slli(Rd, Rd, 12); ++ addi(Rd, Rd, (int32_t)lower >> 20); ++ slli(Rd, Rd, 12); ++ lower = ((int32_t)imm << 12) >> 20; ++ addi(Rd, Rd, lower); ++ slli(Rd, Rd, 8); ++ lower = imm & 0xff; ++ addi(Rd, Rd, lower); +} + +void Assembler::li32(Register Rd, int32_t imm) { @@ -1162,15 +584,16 @@ index 000000000..40ecf1a6c + +#define INSN(NAME, REGISTER) \ + void Assembler::NAME(const Address &adr, Register temp) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(adr.target(), temp); \ + break; \ + } \ + case Address::base_plus_offset: { \ -+ Address tmp_adr = form_address(adr.base(), adr.offset(), 12, temp); \ -+ jalr(REGISTER, tmp_adr.base(), tmp_adr.offset()); \ ++ int32_t offset = 0; \ ++ baseOffset(temp, adr, offset); \ ++ jalr(REGISTER, temp, offset); \ + break; \ + } \ + default: \ @@ -1245,7 +668,7 @@ index 000000000..40ecf1a6c + addi(Rd, Rd, (imm64 >> 6) & 0x7ff); + slli(Rd, Rd, 6); + -+ // Here, remove the addi instruct and return the offset directly. This offset will be used by following jalr/ld. ++ // This offset will be used by following jalr/ld. + offset = imm64 & 0x3f; +} + @@ -1313,14 +736,14 @@ index 000000000..40ecf1a6c +} diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp new file mode 100644 -index 000000000..d4da30ed6 +index 0000000000..9e7d271860 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp -@@ -0,0 +1,2004 @@ +@@ -0,0 +1,3057 @@ +/* -+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -1348,6 +771,7 @@ index 000000000..d4da30ed6 + +#include "asm/register.hpp" +#include "assembler_riscv.inline.hpp" ++#include "metaprogramming/enableIf.hpp" + +#define XLEN 64 + @@ -1359,10 +783,10 @@ index 000000000..d4da30ed6 +class Argument { + public: + enum { -+ n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) -+ n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) ++ n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) ++ n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) + -+ n_int_register_parameters_j = 8, // x11, ... x17, x10 (rj_rarg0, j_rarg1, ...) ++ n_int_register_parameters_j = 8, // x11, ... x17, x10 (j_rarg0, j_rarg1, ...) + n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...) + }; +}; @@ -1386,7 +810,21 @@ index 000000000..d4da30ed6 +REGISTER_DECLARATION(FloatRegister, c_farg6, f16); +REGISTER_DECLARATION(FloatRegister, c_farg7, f17); + -+// java function register(caller-save registers) ++// Symbolically name the register arguments used by the Java calling convention. ++// We have control over the convention for java so we can do what we please. ++// What pleases us is to offset the java calling convention so that when ++// we call a suitable jni method the arguments are lined up and we don't ++// have to do much shuffling. A suitable jni method is non-static and a ++// small number of arguments. ++// ++// |------------------------------------------------------------------------| ++// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7 | ++// |------------------------------------------------------------------------| ++// | x10 x11 x12 x13 x14 x15 x16 x17 | ++// |------------------------------------------------------------------------| ++// | j_rarg7 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6 | ++// |------------------------------------------------------------------------| ++ +REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); +REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); +REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); @@ -1396,6 +834,8 @@ index 000000000..d4da30ed6 +REGISTER_DECLARATION(Register, j_rarg6, c_rarg7); +REGISTER_DECLARATION(Register, j_rarg7, c_rarg0); + ++// Java floating args are passed as per C ++ +REGISTER_DECLARATION(FloatRegister, j_farg0, f10); +REGISTER_DECLARATION(FloatRegister, j_farg1, f11); +REGISTER_DECLARATION(FloatRegister, j_farg2, f12); @@ -1412,6 +852,9 @@ index 000000000..d4da30ed6 +// thread pointer +REGISTER_DECLARATION(Register, tp, x4); + ++// registers used to hold VM data either temporarily within a method ++// or across method calls ++ +// volatile (caller-save) registers + +// current method -- must be in a call-clobbered register @@ -1434,9 +877,6 @@ index 000000000..d4da30ed6 +// locals on stack +REGISTER_DECLARATION(Register, xlocals, x24); + -+/* If you use x4(tp) as java thread pointer according to the instruction manual, -+ * it overlaps with the register used by c++ thread. -+ */ +// java thread pointer +REGISTER_DECLARATION(Register, xthread, x23); +// bytecode pointer @@ -1446,13 +886,13 @@ index 000000000..d4da30ed6 +// Java stack pointer +REGISTER_DECLARATION(Register, esp, x20); + -+// tempory register(caller-save registers) ++// temporary register(caller-save registers) +REGISTER_DECLARATION(Register, t0, x5); +REGISTER_DECLARATION(Register, t1, x6); +REGISTER_DECLARATION(Register, t2, x7); + +const Register g_INTArgReg[Argument::n_int_register_parameters_c] = { -+ c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 ++ c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 +}; + +const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = { @@ -1469,6 +909,7 @@ index 000000000..d4da30ed6 + + private: + Register _base; ++ Register _index; + int64_t _offset; + enum mode _mode; + @@ -1481,46 +922,49 @@ index 000000000..d4da30ed6 + + public: + Address() -+ : _base(noreg), _offset(0), _mode(no_mode), _target(NULL) { } ++ : _base(noreg), _index(noreg), _offset(0), _mode(no_mode), _target(NULL) { } + Address(Register r) -+ : _base(r), _offset(0), _mode(base_plus_offset), _target(NULL) { } ++ : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(NULL) { } + Address(Register r, int o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + Address(Register r, long o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + Address(Register r, long long o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + Address(Register r, unsigned int o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + Address(Register r, unsigned long o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } + Address(Register r, unsigned long long o) -+ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(NULL) { } +#ifdef ASSERT + Address(Register r, ByteSize disp) -+ : _base(r), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(NULL) { } ++ : _base(r), _index(noreg), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(0) { } +#endif + Address(address target, RelocationHolder const& rspec) + : _base(noreg), ++ _index(noreg), + _offset(0), + _mode(literal), + _rspec(rspec), -+ _target(target) { } ++ _target(target) { } + Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type); + + const Register base() const { -+ guarantee((_mode == base_plus_offset || _mode == pcrel || _mode == literal), "wrong mode"); ++ guarantee((_mode == base_plus_offset | _mode == pcrel | _mode == literal), "wrong mode"); + return _base; + } + long offset() const { + return _offset; + } -+ ++ Register index() const { ++ return _index; ++ } + mode getMode() const { + return _mode; + } + -+ bool uses(Register reg) const { return _base == reg;} ++ bool uses(Register reg) const { return _base == reg; } + const address target() const { return _target; } + const RelocationHolder& rspec() const { return _rspec; } + ~Address() { @@ -1584,34 +1028,40 @@ index 000000000..d4da30ed6 + rdy = 0b111, // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid. + }; + -+ Address form_address_complex(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) { -+ assert_different_registers(noreg, temp, base); -+ int64_t upper = offset, lower = offset; -+ -+ int8_t shift = 64 - expect_offbits; -+ lower = (offset << shift) >> shift; -+ upper -= lower; -+ -+ li(temp, upper); -+ add(temp, temp, base); -+ return Address(temp, lower); ++ void baseOffset32(Register Rd, const Address &adr, int32_t &offset) { ++ assert(Rd != noreg, "Rd must not be empty register!"); ++ guarantee(Rd != adr.base(), "should use different registers!"); ++ if (is_offset_in_range(adr.offset(), 32)) { ++ int32_t imm = adr.offset(); ++ int32_t upper = imm, lower = imm; ++ lower = (imm << 20) >> 20; ++ upper -= lower; ++ lui(Rd, upper); ++ offset = lower; ++ } else { ++ movptr_with_offset(Rd, (address)(uintptr_t)adr.offset(), offset); ++ } ++ add(Rd, Rd, adr.base()); + } + -+ Address form_address(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) { -+ if (is_offset_in_range(offset, expect_offbits)) { -+ return Address(base, offset); ++ void baseOffset(Register Rd, const Address &adr, int32_t &offset) { ++ if (is_offset_in_range(adr.offset(), 12)) { ++ assert(Rd != noreg, "Rd must not be empty register!"); ++ addi(Rd, adr.base(), adr.offset()); ++ offset = 0; ++ } else { ++ baseOffset32(Rd, adr, offset); + } -+ return form_address_complex(base, offset, expect_offbits, temp); + } + -+ void li(Register Rd, int64_t imm); // optimized load immediate ++ void _li(Register Rd, int64_t imm); // optimized load immediate + void li32(Register Rd, int32_t imm); + void li64(Register Rd, int64_t imm); + void movptr(Register Rd, address addr); + void movptr_with_offset(Register Rd, address addr, int32_t &offset); + void movptr(Register Rd, uintptr_t imm64); + void j(const address &dest, Register temp = t0); -+ void j(const Address &adr, Register temp = t0) ; ++ void j(const Address &adr, Register temp = t0); + void j(Label &l, Register temp = t0); + void jal(Label &l, Register temp = t0); + void jal(const address &dest, Register temp = t0); @@ -1633,7 +1083,7 @@ index 000000000..d4da30ed6 + static inline uint32_t extract(uint32_t val, unsigned msb, unsigned lsb) { + assert_cond(msb >= lsb && msb <= 31); + unsigned nbits = msb - lsb + 1; -+ uint32_t mask = checked_cast(right_n_bits(nbits)); ++ uint32_t mask = (1U << nbits) - 1; + uint32_t result = val >> lsb; + result &= mask; + return result; @@ -1650,8 +1100,8 @@ index 000000000..d4da30ed6 + assert_cond(a != NULL); + assert_cond(msb >= lsb && msb <= 31); + unsigned nbits = msb - lsb + 1; -+ guarantee(val < (1ULL << nbits), "Field too big for insn"); -+ unsigned mask = checked_cast(right_n_bits(nbits)); ++ guarantee(val < (1U << nbits), "Field too big for insn"); ++ unsigned mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + unsigned target = *(unsigned *)a; @@ -1680,11 +1130,11 @@ index 000000000..d4da30ed6 + emit_int32((jint)insn); + } + -+ void halt() { ++ void _halt() { + emit_int32(0); + } + -+// Rigster Instruction ++// Register Instruction +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ + unsigned insn = 0; \ @@ -1697,18 +1147,18 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(add, 0b0110011, 0b000, 0b0000000); -+ INSN(sub, 0b0110011, 0b000, 0b0100000); -+ INSN(andr, 0b0110011, 0b111, 0b0000000); -+ INSN(orr, 0b0110011, 0b110, 0b0000000); -+ INSN(xorr, 0b0110011, 0b100, 0b0000000); ++ INSN(_add, 0b0110011, 0b000, 0b0000000); ++ INSN(_sub, 0b0110011, 0b000, 0b0100000); ++ INSN(_andr, 0b0110011, 0b111, 0b0000000); ++ INSN(_orr, 0b0110011, 0b110, 0b0000000); ++ INSN(_xorr, 0b0110011, 0b100, 0b0000000); + INSN(sll, 0b0110011, 0b001, 0b0000000); + INSN(sra, 0b0110011, 0b101, 0b0100000); + INSN(srl, 0b0110011, 0b101, 0b0000000); + INSN(slt, 0b0110011, 0b010, 0b0000000); + INSN(sltu, 0b0110011, 0b011, 0b0000000); -+ INSN(addw, 0b0111011, 0b000, 0b0000000); -+ INSN(subw, 0b0111011, 0b000, 0b0100000); ++ INSN(_addw, 0b0111011, 0b000, 0b0000000); ++ INSN(_subw, 0b0111011, 0b000, 0b0100000); + INSN(sllw, 0b0111011, 0b001, 0b0000000); + INSN(sraw, 0b0111011, 0b101, 0b0100000); + INSN(srlw, 0b0111011, 0b101, 0b0000000); @@ -1726,22 +1176,20 @@ index 000000000..d4da30ed6 + INSN(remw, 0b0111011, 0b110, 0b0000001); + INSN(remuw, 0b0111011, 0b111, 0b0000001); + -+ // Vector Configuration Instruction -+ INSN(vsetvl, 0b1010111, 0b111, 0b1000000); -+ +#undef INSN + +#define INSN_ENTRY_RELOC(result_type, header) \ + result_type header { \ ++ InstructionMark im(this); \ + guarantee(rtype == relocInfo::internal_word_type, \ + "only internal_word_type relocs make sense here"); \ -+ code_section()->relocate(pc(), InternalAddress(dest).rspec()); ++ code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); + + // Load/store register (all modes) +#define INSN(NAME, op, funct3) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ unsigned insn = 0; \ + int32_t val = offset & 0xfff; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ @@ -1749,7 +1197,19 @@ index 000000000..d4da30ed6 + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 31, 20, val); \ + emit(insn); \ -+ } \ ++ } ++ ++ INSN(lb, 0b0000011, 0b000); ++ INSN(lbu, 0b0000011, 0b100); ++ INSN(lh, 0b0000011, 0b001); ++ INSN(lhu, 0b0000011, 0b101); ++ INSN(_lw, 0b0000011, 0b010); ++ INSN(lwu, 0b0000011, 0b110); ++ INSN(_ld, 0b0000011, 0b011); ++ ++#undef INSN ++ ++#define INSN(NAME) \ + void NAME(Register Rd, address dest) { \ + assert_cond(dest != NULL); \ + int64_t distance = (dest - pc()); \ @@ -1766,7 +1226,7 @@ index 000000000..d4da30ed6 + NAME(Rd, dest); \ + } \ + void NAME(Register Rd, const Address &adr, Register temp = t0) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rd, adr.target()); \ @@ -1776,7 +1236,14 @@ index 000000000..d4da30ed6 + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rd, adr.base(), adr.offset()); \ + } else { \ -+ NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, Rd == adr.base() ? temp : Rd)); \ ++ int32_t offset = 0; \ ++ if (Rd == adr.base()) { \ ++ baseOffset32(temp, adr, offset); \ ++ NAME(Rd, temp, offset); \ ++ } else { \ ++ baseOffset32(Rd, adr, offset); \ ++ NAME(Rd, Rd, offset); \ ++ } \ + } \ + break; \ + } \ @@ -1788,20 +1255,20 @@ index 000000000..d4da30ed6 + wrap_label(Rd, L, &Assembler::NAME); \ + } + -+ INSN(lb, 0b0000011, 0b000); -+ INSN(lbu, 0b0000011, 0b100); -+ INSN(ld, 0b0000011, 0b011); -+ INSN(lh, 0b0000011, 0b001); -+ INSN(lhu, 0b0000011, 0b101); -+ INSN(lw, 0b0000011, 0b010); -+ INSN(lwu, 0b0000011, 0b110); ++ INSN(lb); ++ INSN(lbu); ++ INSN(lh); ++ INSN(lhu); ++ INSN(lw); ++ INSN(lwu); ++ INSN(ld); + +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ unsigned insn = 0; \ + uint32_t val = offset & 0xfff; \ + patch((address)&insn, 6, 0, op); \ + patch((address)&insn, 14, 12, funct3); \ @@ -1809,7 +1276,14 @@ index 000000000..d4da30ed6 + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 31, 20, val); \ + emit(insn); \ -+ } \ ++ } ++ ++ INSN(flw, 0b0000111, 0b010); ++ INSN(_fld, 0b0000111, 0b011); ++ ++#undef INSN ++ ++#define INSN(NAME) \ + void NAME(FloatRegister Rd, address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + int64_t distance = (dest - pc()); \ @@ -1826,7 +1300,7 @@ index 000000000..d4da30ed6 + NAME(Rd, dest, temp); \ + } \ + void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rd, adr.target(), temp); \ @@ -1836,7 +1310,9 @@ index 000000000..d4da30ed6 + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rd, adr.base(), adr.offset()); \ + } else { \ -+ NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, temp)); \ ++ int32_t offset = 0; \ ++ baseOffset32(temp, adr, offset); \ ++ NAME(Rd, temp, offset); \ + } \ + break; \ + } \ @@ -1845,14 +1321,14 @@ index 000000000..d4da30ed6 + } \ + } + -+ INSN(flw, 0b0000111, 0b010); -+ INSN(fld, 0b0000111, 0b011); ++ INSN(flw); ++ INSN(fld); +#undef INSN + +#define INSN(NAME, op, funct3) \ + void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \ ++ unsigned insn = 0; \ + uint32_t val = offset & 0x1fff; \ + uint32_t val11 = (val >> 11) & 0x1; \ + uint32_t val12 = (val >> 12) & 0x1; \ @@ -1867,7 +1343,18 @@ index 000000000..d4da30ed6 + patch((address)&insn, 30, 25, high); \ + patch((address)&insn, 31, val12); \ + emit(insn); \ -+ } \ ++ } ++ ++ INSN(_beq, 0b1100011, 0b000); ++ INSN(_bne, 0b1100011, 0b001); ++ INSN(bge, 0b1100011, 0b101); ++ INSN(bgeu, 0b1100011, 0b111); ++ INSN(blt, 0b1100011, 0b100); ++ INSN(bltu, 0b1100011, 0b110); ++ ++#undef INSN ++ ++#define INSN(NAME) \ + void NAME(Register Rs1, Register Rs2, const address dest) { \ + assert_cond(dest != NULL); \ + int64_t offset = (dest - pc()); \ @@ -1878,12 +1365,12 @@ index 000000000..d4da30ed6 + NAME(Rs1, Rs2, dest); \ + } + -+ INSN(beq, 0b1100011, 0b000); -+ INSN(bge, 0b1100011, 0b101); -+ INSN(bgeu, 0b1100011, 0b111); -+ INSN(blt, 0b1100011, 0b100); -+ INSN(bltu, 0b1100011, 0b110); -+ INSN(bne, 0b1100011, 0b001); ++ INSN(beq); ++ INSN(bne); ++ INSN(bge); ++ INSN(bgeu); ++ INSN(blt); ++ INSN(bltu); + +#undef INSN + @@ -1903,8 +1390,8 @@ index 000000000..d4da30ed6 + +#define INSN(NAME, REGISTER, op, funct3) \ + void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ unsigned insn = 0; \ + uint32_t val = offset & 0xfff; \ + uint32_t low = val & 0x1f; \ + uint32_t high = (val >> 5) & 0x7f; \ @@ -1916,16 +1403,27 @@ index 000000000..d4da30ed6 + patch((address)&insn, 31, 25, high); \ + emit(insn); \ + } \ ++ ++ INSN(sb, Register, 0b0100011, 0b000); ++ INSN(sh, Register, 0b0100011, 0b001); ++ INSN(_sw, Register, 0b0100011, 0b010); ++ INSN(_sd, Register, 0b0100011, 0b011); ++ INSN(fsw, FloatRegister, 0b0100111, 0b010); ++ INSN(_fsd, FloatRegister, 0b0100111, 0b011); ++ ++#undef INSN ++ ++#define INSN(NAME, REGISTER) \ + INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0)) \ + NAME(Rs, dest, temp); \ + } + -+ INSN(sb, Register, 0b0100011, 0b000); -+ INSN(sh, Register, 0b0100011, 0b001); -+ INSN(sw, Register, 0b0100011, 0b010); -+ INSN(sd, Register, 0b0100011, 0b011); -+ INSN(fsw, FloatRegister, 0b0100111, 0b010); -+ INSN(fsd, FloatRegister, 0b0100111, 0b011); ++ INSN(sb, Register); ++ INSN(sh, Register); ++ INSN(sw, Register); ++ INSN(sd, Register); ++ INSN(fsw, FloatRegister); ++ INSN(fsd, FloatRegister); + +#undef INSN + @@ -1944,7 +1442,7 @@ index 000000000..d4da30ed6 + } \ + } \ + void NAME(Register Rs, const Address &adr, Register temp = t0) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + assert_different_registers(Rs, temp); \ + code_section()->relocate(pc(), adr.rspec()); \ @@ -1955,8 +1453,10 @@ index 000000000..d4da30ed6 + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rs, adr.base(), adr.offset()); \ + } else { \ ++ int32_t offset= 0; \ + assert_different_registers(Rs, temp); \ -+ NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp)); \ ++ baseOffset32(temp, adr, offset); \ ++ NAME(Rs, temp, offset); \ + } \ + break; \ + } \ @@ -1986,7 +1486,7 @@ index 000000000..d4da30ed6 + } \ + } \ + void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \ -+ switch(adr.getMode()) { \ ++ switch (adr.getMode()) { \ + case Address::literal: { \ + code_section()->relocate(pc(), adr.rspec()); \ + NAME(Rs, adr.target(), temp); \ @@ -1996,7 +1496,9 @@ index 000000000..d4da30ed6 + if (is_offset_in_range(adr.offset(), 12)) { \ + NAME(Rs, adr.base(), adr.offset()); \ + } else { \ -+ NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp)); \ ++ int32_t offset = 0; \ ++ baseOffset32(temp, adr, offset); \ ++ NAME(Rs, temp, offset); \ + } \ + break; \ + } \ @@ -2050,8 +1552,8 @@ index 000000000..d4da30ed6 + +#define INSN(NAME, op) \ + void NAME(Register Rd, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid."); \ ++ unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff)); \ @@ -2059,7 +1561,13 @@ index 000000000..d4da30ed6 + patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff)); \ + patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1)); \ + emit(insn); \ -+ } \ ++ } ++ ++ INSN(_jal, 0b1101111); ++ ++#undef INSN ++ ++#define INSN(NAME) \ + void NAME(Register Rd, const address dest, Register temp = t0) { \ + assert_cond(dest != NULL); \ + int64_t offset = dest - pc(); \ @@ -2077,7 +1585,7 @@ index 000000000..d4da30ed6 + wrap_label(Rd, L, temp, &Assembler::NAME); \ + } + -+ INSN(jal, 0b1101111); ++ INSN(jal); + +#undef INSN + @@ -2085,8 +1593,8 @@ index 000000000..d4da30ed6 + +#define INSN(NAME, op, funct) \ + void NAME(Register Rd, Register Rs, const int32_t offset) { \ -+ unsigned insn = 0; \ + guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ + patch_reg((address)&insn, 7, Rd); \ + patch((address)&insn, 14, 12, funct); \ @@ -2096,7 +1604,7 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(jalr, 0b1100111, 0b000); ++ INSN(_jalr, 0b1100111, 0b000); + +#undef INSN + @@ -2131,7 +1639,8 @@ index 000000000..d4da30ed6 + } + + INSN(ecall, 0b1110011, 0b000, 0b000000000000); -+ INSN(ebreak, 0b1110011, 0b000, 0b000000000001); ++ INSN(_ebreak, 0b1110011, 0b000, 0b000000000001); ++ +#undef INSN + +enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; @@ -2239,12 +1748,12 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(addi, 0b0010011, 0b000); -+ INSN(slti, 0b0010011, 0b010); -+ INSN(addiw, 0b0011011, 0b000); -+ INSN(and_imm12, 0b0010011, 0b111); -+ INSN(ori, 0b0010011, 0b110); -+ INSN(xori, 0b0010011, 0b100); ++ INSN(_addi, 0b0010011, 0b000); ++ INSN(slti, 0b0010011, 0b010); ++ INSN(_addiw, 0b0011011, 0b000); ++ INSN(_and_imm12, 0b0010011, 0b111); ++ INSN(ori, 0b0010011, 0b110); ++ INSN(xori, 0b0010011, 0b100); + +#undef INSN + @@ -2278,9 +1787,9 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(slli, 0b0010011, 0b001, 0b000000); -+ INSN(srai, 0b0010011, 0b101, 0b010000); -+ INSN(srli, 0b0010011, 0b101, 0b000000); ++ INSN(_slli, 0b0010011, 0b001, 0b000000); ++ INSN(_srai, 0b0010011, 0b101, 0b010000); ++ INSN(_srli, 0b0010011, 0b101, 0b000000); + +#undef INSN + @@ -2316,7 +1825,7 @@ index 000000000..d4da30ed6 + emit(insn); \ + } + -+ INSN(lui, 0b0110111); ++ INSN(_lui, 0b0110111); + INSN(auipc, 0b0010111); + +#undef INSN @@ -2592,6 +2101,23 @@ index 000000000..d4da30ed6 + +#undef patch_vtype + ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch_reg((address)&insn, 20, Rs2); \ ++ emit(insn); \ ++ } ++ ++ // Vector Configuration Instruction ++ INSN(vsetvl, 0b1010111, 0b111, 0b1000000); ++ ++#undef INSN ++ +enum VectorMask { + v0_t = 0b0, + unmasked = 0b1 @@ -3159,7 +2685,7 @@ index 000000000..d4da30ed6 + +// ==================================== +// RISC-V Bit-Manipulation Extension -+// Currently only support Zba and Zbb. ++// Currently only support Zba, Zbb and Zbs bitmanip extensions. +// ==================================== +#define INSN(NAME, op, funct3, funct7) \ + void NAME(Register Rd, Register Rs1, Register Rs2) { \ @@ -3234,11 +2760,12 @@ index 000000000..d4da30ed6 + + INSN(rori, 0b0010011, 0b101, 0b011000); + INSN(slli_uw, 0b0011011, 0b001, 0b000010); ++ INSN(bexti, 0b0010011, 0b101, 0b010010); + +#undef INSN + +#define INSN(NAME, op, funct3, funct7) \ -+ void NAME(Register Rd, Register Rs1, unsigned shamt){ \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt) {\ + guarantee(shamt <= 0x1f, "Shamt is invalid"); \ + unsigned insn = 0; \ + patch((address)&insn, 6, 0, op); \ @@ -3251,9 +2778,966 @@ index 000000000..d4da30ed6 + } + + INSN(roriw, 0b0011011, 0b101, 0b0110000); -+ ++ +#undef INSN + ++// ======================================== ++// RISC-V Compressed Instructions Extension ++// ======================================== ++// Note: ++// 1. When UseRVC is enabled, 32-bit instructions under 'CompressibleRegion's will be ++// transformed to 16-bit instructions if compressible. ++// 2. RVC instructions in Assembler always begin with 'c_' prefix, as 'c_li', ++// but most of time we have no need to explicitly use these instructions. ++// 3. 'CompressibleRegion' is introduced to hint instructions in this Region's RTTI range ++// are qualified to be compressed with their 2-byte versions. ++// An example: ++// ++// CompressibleRegion cr(_masm); ++// __ andr(...); // this instruction could change to c.and if able to ++// ++// 4. Using -XX:PrintAssemblyOptions=no-aliases could distinguish RVC instructions from ++// normal ones. ++// ++ ++private: ++ bool _in_compressible_region; ++public: ++ bool in_compressible_region() const { return _in_compressible_region; } ++ void set_in_compressible_region(bool b) { _in_compressible_region = b; } ++public: ++ ++ // a compressible region ++ class CompressibleRegion : public StackObj { ++ protected: ++ Assembler *_masm; ++ bool _saved_in_compressible_region; ++ public: ++ CompressibleRegion(Assembler *_masm) ++ : _masm(_masm) ++ , _saved_in_compressible_region(_masm->in_compressible_region()) { ++ _masm->set_in_compressible_region(true); ++ } ++ ~CompressibleRegion() { ++ _masm->set_in_compressible_region(_saved_in_compressible_region); ++ } ++ }; ++ ++ // patch a 16-bit instruction. ++ static void c_patch(address a, unsigned msb, unsigned lsb, uint16_t val) { ++ assert_cond(a != NULL); ++ assert_cond(msb >= lsb && msb <= 15); ++ unsigned nbits = msb - lsb + 1; ++ guarantee(val < (1U << nbits), "Field too big for insn"); ++ uint16_t mask = (1U << nbits) - 1; ++ val <<= lsb; ++ mask <<= lsb; ++ uint16_t target = *(uint16_t *)a; ++ target &= ~mask; ++ target |= val; ++ *(uint16_t *)a = target; ++ } ++ ++ static void c_patch(address a, unsigned bit, uint16_t val) { ++ c_patch(a, bit, bit, val); ++ } ++ ++ // patch a 16-bit instruction with a general purpose register ranging [0, 31] (5 bits) ++ static void c_patch_reg(address a, unsigned lsb, Register reg) { ++ c_patch(a, lsb + 4, lsb, reg->encoding_nocheck()); ++ } ++ ++ // patch a 16-bit instruction with a general purpose register ranging [8, 15] (3 bits) ++ static void c_patch_compressed_reg(address a, unsigned lsb, Register reg) { ++ c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); ++ } ++ ++ // patch a 16-bit instruction with a float register ranging [0, 31] (5 bits) ++ static void c_patch_reg(address a, unsigned lsb, FloatRegister reg) { ++ c_patch(a, lsb + 4, lsb, reg->encoding_nocheck()); ++ } ++ ++ // patch a 16-bit instruction with a float register ranging [8, 15] (3 bits) ++ static void c_patch_compressed_reg(address a, unsigned lsb, FloatRegister reg) { ++ c_patch(a, lsb + 2, lsb, reg->compressed_encoding_nocheck()); ++ } ++ ++// -------------- RVC Instruction Definitions -------------- ++ ++ void c_nop() { ++ c_addi(x0, 0); ++ } ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd_Rs1, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 6, 0)); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ ++ c_patch_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_addi, 0b000, 0b01); ++ INSN(c_addiw, 0b001, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 10, 0)); \ ++ assert_cond((imm & 0b1111) == 0); \ ++ assert_cond(imm != 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 4, 3, (imm & right_n_bits(9)) >> 7); \ ++ c_patch((address)&insn, 5, 5, (imm & nth_bit(6)) >> 6); \ ++ c_patch((address)&insn, 6, 6, (imm & nth_bit(4)) >> 4); \ ++ c_patch_reg((address)&insn, 7, sp); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(9)) >> 9); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_addi16sp, 0b011, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 10, 0)); \ ++ assert_cond((uimm & 0b11) == 0); \ ++ assert_cond(uimm != 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_compressed_reg((address)&insn, 2, Rd); \ ++ c_patch((address)&insn, 5, 5, (uimm & nth_bit(3)) >> 3); \ ++ c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ ++ c_patch((address)&insn, 10, 7, (uimm & right_n_bits(10)) >> 6); \ ++ c_patch((address)&insn, 12, 11, (uimm & right_n_bits(6)) >> 4); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_addi4spn, 0b000, 0b00); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd_Rs1, uint32_t shamt) { \ ++ assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ ++ assert_cond(shamt != 0); \ ++ assert_cond(Rd_Rs1 != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ ++ c_patch_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_slli, 0b000, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, funct2, op) \ ++ void NAME(Register Rd_Rs1, uint32_t shamt) { \ ++ assert_cond(is_unsigned_imm_in_range(shamt, 6, 0)); \ ++ assert_cond(shamt != 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (shamt & right_n_bits(5))); \ ++ c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 11, 10, funct2); \ ++ c_patch((address)&insn, 12, 12, (shamt & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_srli, 0b100, 0b00, 0b01); ++ INSN(c_srai, 0b100, 0b01, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, funct2, op) \ ++ void NAME(Register Rd_Rs1, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 6, 0)); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ ++ c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 11, 10, funct2); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_andi, 0b100, 0b10, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct6, funct2, op) \ ++ void NAME(Register Rd_Rs1, Register Rs2) { \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_compressed_reg((address)&insn, 2, Rs2); \ ++ c_patch((address)&insn, 6, 5, funct2); \ ++ c_patch_compressed_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 15, 10, funct6); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_sub, 0b100011, 0b00, 0b01); ++ INSN(c_xor, 0b100011, 0b01, 0b01); ++ INSN(c_or, 0b100011, 0b10, 0b01); ++ INSN(c_and, 0b100011, 0b11, 0b01); ++ INSN(c_subw, 0b100111, 0b00, 0b01); ++ INSN(c_addw, 0b100111, 0b01, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct4, op) \ ++ void NAME(Register Rd_Rs1, Register Rs2) { \ ++ assert_cond(Rd_Rs1 != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_reg((address)&insn, 2, Rs2); \ ++ c_patch_reg((address)&insn, 7, Rd_Rs1); \ ++ c_patch((address)&insn, 15, 12, funct4); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_mv, 0b1000, 0b10); ++ INSN(c_add, 0b1001, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct4, op) \ ++ void NAME(Register Rs1) { \ ++ assert_cond(Rs1 != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_reg((address)&insn, 2, x0); \ ++ c_patch_reg((address)&insn, 7, Rs1); \ ++ c_patch((address)&insn, 15, 12, funct4); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_jr, 0b1000, 0b10); ++ INSN(c_jalr, 0b1001, 0b10); ++ ++#undef INSN ++ ++ typedef void (Assembler::* j_c_insn)(address dest); ++ typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest); ++ ++ void wrap_label(Label &L, j_c_insn insn) { ++ if (L.is_bound()) { ++ (this->*insn)(target(L)); ++ } else { ++ L.add_patch_at(code(), locator()); ++ (this->*insn)(pc()); ++ } ++ } ++ ++ void wrap_label(Label &L, Register r, compare_and_branch_c_insn insn) { ++ if (L.is_bound()) { ++ (this->*insn)(r, target(L)); ++ } else { ++ L.add_patch_at(code(), locator()); ++ (this->*insn)(r, pc()); ++ } ++ } ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(int32_t offset) { \ ++ assert_cond(is_imm_in_range(offset, 11, 1)); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1); \ ++ c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7); \ ++ c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6); \ ++ c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10); \ ++ c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8); \ ++ c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4); \ ++ c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } \ ++ void NAME(address dest) { \ ++ assert_cond(dest != NULL); \ ++ int64_t distance = dest - pc(); \ ++ assert_cond(is_imm_in_range(distance, 11, 1)); \ ++ c_j(distance); \ ++ } \ ++ void NAME(Label &L) { \ ++ wrap_label(L, &Assembler::NAME); \ ++ } ++ ++ INSN(c_j, 0b101, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rs1, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 8, 1)); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 2, 2, (imm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 4, 3, (imm & right_n_bits(3)) >> 1); \ ++ c_patch((address)&insn, 6, 5, (imm & right_n_bits(8)) >> 6); \ ++ c_patch_compressed_reg((address)&insn, 7, Rs1); \ ++ c_patch((address)&insn, 11, 10, (imm & right_n_bits(5)) >> 3); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(8)) >> 8); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } \ ++ void NAME(Register Rs1, address dest) { \ ++ assert_cond(dest != NULL); \ ++ int64_t distance = dest - pc(); \ ++ assert_cond(is_imm_in_range(distance, 8, 1)); \ ++ NAME(Rs1, distance); \ ++ } \ ++ void NAME(Register Rs1, Label &L) { \ ++ wrap_label(L, Rs1, &Assembler::NAME); \ ++ } ++ ++ INSN(c_beqz, 0b110, 0b01); ++ INSN(c_bnez, 0b111, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 18, 0)); \ ++ assert_cond((imm & 0xfff) == 0); \ ++ assert_cond(imm != 0); \ ++ assert_cond(Rd != x0 && Rd != x2); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (imm & right_n_bits(17)) >> 12); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (imm & nth_bit(17)) >> 17); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_lui, 0b011, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, int32_t imm) { \ ++ assert_cond(is_imm_in_range(imm, 6, 0)); \ ++ assert_cond(Rd != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 6, 2, (imm & right_n_bits(5))); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (imm & right_n_bits(6)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_li, 0b010, 0b01); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ ++ assert_cond((uimm & 0b111) == 0); \ ++ assert_cond(Rd != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ ++ c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_ldsp, 0b011, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(FloatRegister Rd, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ ++ assert_cond((uimm & 0b111) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 4, 2, (uimm & right_n_bits(9)) >> 6); \ ++ c_patch((address)&insn, 6, 5, (uimm & right_n_bits(5)) >> 3); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_fldsp, 0b001, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op, REGISTER_TYPE) \ ++ void NAME(REGISTER_TYPE Rd_Rs2, Register Rs1, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ ++ assert_cond((uimm & 0b111) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \ ++ c_patch((address)&insn, 6, 5, (uimm & right_n_bits(8)) >> 6); \ ++ c_patch_compressed_reg((address)&insn, 7, Rs1); \ ++ c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_ld, 0b011, 0b00, Register); ++ INSN(c_sd, 0b111, 0b00, Register); ++ INSN(c_fld, 0b001, 0b00, FloatRegister); ++ INSN(c_fsd, 0b101, 0b00, FloatRegister); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op, REGISTER_TYPE) \ ++ void NAME(REGISTER_TYPE Rs2, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 9, 0)); \ ++ assert_cond((uimm & 0b111) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_reg((address)&insn, 2, Rs2); \ ++ c_patch((address)&insn, 9, 7, (uimm & right_n_bits(9)) >> 6); \ ++ c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_sdsp, 0b111, 0b10, Register); ++ INSN(c_fsdsp, 0b101, 0b10, FloatRegister); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rs2, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ ++ assert_cond((uimm & 0b11) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_reg((address)&insn, 2, Rs2); \ ++ c_patch((address)&insn, 8, 7, (uimm & right_n_bits(8)) >> 6); \ ++ c_patch((address)&insn, 12, 9, (uimm & right_n_bits(6)) >> 2); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_swsp, 0b110, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 8, 0)); \ ++ assert_cond((uimm & 0b11) == 0); \ ++ assert_cond(Rd != x0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 3, 2, (uimm & right_n_bits(8)) >> 6); \ ++ c_patch((address)&insn, 6, 4, (uimm & right_n_bits(5)) >> 2); \ ++ c_patch_reg((address)&insn, 7, Rd); \ ++ c_patch((address)&insn, 12, 12, (uimm & nth_bit(5)) >> 5); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_lwsp, 0b010, 0b10); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME(Register Rd_Rs2, Register Rs1, uint32_t uimm) { \ ++ assert_cond(is_unsigned_imm_in_range(uimm, 7, 0)); \ ++ assert_cond((uimm & 0b11) == 0); \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch_compressed_reg((address)&insn, 2, Rd_Rs2); \ ++ c_patch((address)&insn, 5, 5, (uimm & nth_bit(6)) >> 6); \ ++ c_patch((address)&insn, 6, 6, (uimm & nth_bit(2)) >> 2); \ ++ c_patch_compressed_reg((address)&insn, 7, Rs1); \ ++ c_patch((address)&insn, 12, 10, (uimm & right_n_bits(6)) >> 3); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_lw, 0b010, 0b00); ++ INSN(c_sw, 0b110, 0b00); ++ ++#undef INSN ++ ++#define INSN(NAME, funct3, op) \ ++ void NAME() { \ ++ uint16_t insn = 0; \ ++ c_patch((address)&insn, 1, 0, op); \ ++ c_patch((address)&insn, 11, 2, 0x0); \ ++ c_patch((address)&insn, 12, 12, 0b1); \ ++ c_patch((address)&insn, 15, 13, funct3); \ ++ emit_int16(insn); \ ++ } ++ ++ INSN(c_ebreak, 0b100, 0b10); ++ ++#undef INSN ++ ++// -------------- RVC Transformation Functions -------------- ++ ++// -------------------------- ++// Register instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ /* add -> c.add */ \ ++ if (do_compress()) { \ ++ Register src = noreg; \ ++ if (Rs1 != x0 && Rs2 != x0 && ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \ ++ c_add(Rd, src); \ ++ return; \ ++ } \ ++ } \ ++ _add(Rd, Rs1, Rs2); \ ++ } ++ ++ INSN(add); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME, C_NAME, NORMAL_NAME) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ /* sub/subw -> c.sub/c.subw */ \ ++ if (do_compress() && \ ++ (Rd == Rs1 && Rd->is_compressed_valid() && Rs2->is_compressed_valid())) { \ ++ C_NAME(Rd, Rs2); \ ++ return; \ ++ } \ ++ NORMAL_NAME(Rd, Rs1, Rs2); \ ++ } ++ ++ INSN(sub, c_sub, _sub); ++ INSN(subw, c_subw, _subw); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME, C_NAME, NORMAL_NAME) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ /* and/or/xor/addw -> c.and/c.or/c.xor/c.addw */ \ ++ if (do_compress()) { \ ++ Register src = noreg; \ ++ if (Rs1->is_compressed_valid() && Rs2->is_compressed_valid() && \ ++ ((src = Rs1, Rs2 == Rd) || (src = Rs2, Rs1 == Rd))) { \ ++ C_NAME(Rd, src); \ ++ return; \ ++ } \ ++ } \ ++ NORMAL_NAME(Rd, Rs1, Rs2); \ ++ } ++ ++ INSN(andr, c_and, _andr); ++ INSN(orr, c_or, _orr); ++ INSN(xorr, c_xor, _xorr); ++ INSN(addw, c_addw, _addw); ++ ++#undef INSN ++ ++private: ++// some helper functions ++ bool do_compress() const { ++ return UseRVC && in_compressible_region(); ++ } ++ ++#define FUNC(NAME, funct3, bits) \ ++ bool NAME(Register rs1, Register rd_rs2, int32_t imm12, bool ld) { \ ++ return rs1 == sp && \ ++ is_unsigned_imm_in_range(imm12, bits, 0) && \ ++ (intx(imm12) & funct3) == 0x0 && \ ++ (!ld || rd_rs2 != x0); \ ++ } \ ++ ++ FUNC(is_c_ldsdsp, 0b111, 9); ++ FUNC(is_c_lwswsp, 0b011, 8); ++ ++#undef FUNC ++ ++#define FUNC(NAME, funct3, bits) \ ++ bool NAME(Register rs1, int32_t imm12) { \ ++ return rs1 == sp && \ ++ is_unsigned_imm_in_range(imm12, bits, 0) && \ ++ (intx(imm12) & funct3) == 0x0; \ ++ } \ ++ ++ FUNC(is_c_fldsdsp, 0b111, 9); ++ ++#undef FUNC ++ ++#define FUNC(NAME, REG_TYPE, funct3, bits) \ ++ bool NAME(Register rs1, REG_TYPE rd_rs2, int32_t imm12) { \ ++ return rs1->is_compressed_valid() && \ ++ rd_rs2->is_compressed_valid() && \ ++ is_unsigned_imm_in_range(imm12, bits, 0) && \ ++ (intx(imm12) & funct3) == 0x0; \ ++ } \ ++ ++ FUNC(is_c_ldsd, Register, 0b111, 8); ++ FUNC(is_c_lwsw, Register, 0b011, 7); ++ FUNC(is_c_fldsd, FloatRegister, 0b111, 8); ++ ++#undef FUNC ++ ++public: ++// -------------------------- ++// Load/store register ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* lw -> c.lwsp/c.lw */ \ ++ if (do_compress()) { \ ++ if (is_c_lwswsp(Rs, Rd, offset, true)) { \ ++ c_lwsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_lwsw(Rs, Rd, offset)) { \ ++ c_lw(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _lw(Rd, Rs, offset); \ ++ } ++ ++ INSN(lw); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* ld -> c.ldsp/c.ld */ \ ++ if (do_compress()) { \ ++ if (is_c_ldsdsp(Rs, Rd, offset, true)) { \ ++ c_ldsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_ldsd(Rs, Rd, offset)) { \ ++ c_ld(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _ld(Rd, Rs, offset); \ ++ } ++ ++ INSN(ld); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ ++ /* fld -> c.fldsp/c.fld */ \ ++ if (do_compress()) { \ ++ if (is_c_fldsdsp(Rs, offset)) { \ ++ c_fldsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_fldsd(Rs, Rd, offset)) { \ ++ c_fld(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _fld(Rd, Rs, offset); \ ++ } ++ ++ INSN(fld); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* sd -> c.sdsp/c.sd */ \ ++ if (do_compress()) { \ ++ if (is_c_ldsdsp(Rs, Rd, offset, false)) { \ ++ c_sdsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_ldsd(Rs, Rd, offset)) { \ ++ c_sd(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _sd(Rd, Rs, offset); \ ++ } ++ ++ INSN(sd); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* sw -> c.swsp/c.sw */ \ ++ if (do_compress()) { \ ++ if (is_c_lwswsp(Rs, Rd, offset, false)) { \ ++ c_swsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_lwsw(Rs, Rd, offset)) { \ ++ c_sw(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _sw(Rd, Rs, offset); \ ++ } ++ ++ INSN(sw); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ ++ /* fsd -> c.fsdsp/c.fsd */ \ ++ if (do_compress()) { \ ++ if (is_c_fldsdsp(Rs, offset)) { \ ++ c_fsdsp(Rd, offset); \ ++ return; \ ++ } else if (is_c_fldsd(Rs, Rd, offset)) { \ ++ c_fsd(Rd, Rs, offset); \ ++ return; \ ++ } \ ++ } \ ++ _fsd(Rd, Rs, offset); \ ++ } ++ ++ INSN(fsd); ++ ++#undef INSN ++ ++// -------------------------- ++// Conditional branch instructions ++// -------------------------- ++#define INSN(NAME, C_NAME, NORMAL_NAME) \ ++ void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ ++ /* beq/bne -> c.beqz/c.bnez */ \ ++ if (do_compress() && \ ++ (offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() && \ ++ is_imm_in_range(offset, 8, 1))) { \ ++ C_NAME(Rs1, offset); \ ++ return; \ ++ } \ ++ NORMAL_NAME(Rs1, Rs2, offset); \ ++ } ++ ++ INSN(beq, c_beqz, _beq); ++ INSN(bne, c_beqz, _bne); ++ ++#undef INSN ++ ++// -------------------------- ++// Unconditional branch instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, const int32_t offset) { \ ++ /* jal -> c.j */ \ ++ if (do_compress() && offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1)) { \ ++ c_j(offset); \ ++ return; \ ++ } \ ++ _jal(Rd, offset); \ ++ } ++ ++ INSN(jal); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ /* jalr -> c.jr/c.jalr */ \ ++ if (do_compress() && (offset == 0 && Rs != x0)) { \ ++ if (Rd == x1) { \ ++ c_jalr(Rs); \ ++ return; \ ++ } else if (Rd == x0) { \ ++ c_jr(Rs); \ ++ return; \ ++ } \ ++ } \ ++ _jalr(Rd, Rs, offset); \ ++ } ++ ++ INSN(jalr); ++ ++#undef INSN ++ ++// -------------------------- ++// Miscellaneous Instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME() { \ ++ /* ebreak -> c.ebreak */ \ ++ if (do_compress()) { \ ++ c_ebreak(); \ ++ return; \ ++ } \ ++ _ebreak(); \ ++ } ++ ++ INSN(ebreak); ++ ++#undef INSN ++ ++#define INSN(NAME) \ ++ void NAME() { \ ++ /* The illegal instruction in RVC is presented by a 16-bit 0. */ \ ++ if (do_compress()) { \ ++ emit_int16(0); \ ++ return; \ ++ } \ ++ _halt(); \ ++ } ++ ++ INSN(halt); ++ ++#undef INSN ++ ++// -------------------------- ++// Immediate Instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, int64_t imm) { \ ++ /* li -> c.li */ \ ++ if (do_compress() && (is_imm_in_range(imm, 6, 0) && Rd != x0)) { \ ++ c_li(Rd, imm); \ ++ return; \ ++ } \ ++ _li(Rd, imm); \ ++ } ++ ++ INSN(li); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, int32_t imm) { \ ++ /* addi -> c.addi/c.nop/c.mv/c.addi16sp/c.addi4spn */ \ ++ if (do_compress()) { \ ++ if (Rd == Rs1 && is_imm_in_range(imm, 6, 0)) { \ ++ c_addi(Rd, imm); \ ++ return; \ ++ } else if (imm == 0 && Rd != x0 && Rs1 != x0) { \ ++ c_mv(Rd, Rs1); \ ++ return; \ ++ } else if (Rs1 == sp && imm != 0) { \ ++ if (Rd == Rs1 && (imm & 0b1111) == 0x0 && is_imm_in_range(imm, 10, 0)) { \ ++ c_addi16sp(imm); \ ++ return; \ ++ } else if (Rd->is_compressed_valid() && (imm & 0b11) == 0x0 && is_unsigned_imm_in_range(imm, 10, 0)) { \ ++ c_addi4spn(Rd, imm); \ ++ return; \ ++ } \ ++ } \ ++ } \ ++ _addi(Rd, Rs1, imm); \ ++ } ++ ++ INSN(addi); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, int32_t imm) { \ ++ /* addiw -> c.addiw */ \ ++ if (do_compress() && (Rd == Rs1 && Rd != x0 && is_imm_in_range(imm, 6, 0))) { \ ++ c_addiw(Rd, imm); \ ++ return; \ ++ } \ ++ _addiw(Rd, Rs1, imm); \ ++ } ++ ++ INSN(addiw); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, int32_t imm) { \ ++ /* and_imm12 -> c.andi */ \ ++ if (do_compress() && \ ++ (Rd == Rs1 && Rd->is_compressed_valid() && is_imm_in_range(imm, 6, 0))) { \ ++ c_andi(Rd, imm); \ ++ return; \ ++ } \ ++ _and_imm12(Rd, Rs1, imm); \ ++ } ++ ++ INSN(and_imm12); ++ ++#undef INSN ++ ++// -------------------------- ++// Shift Immediate Instructions ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt) { \ ++ /* slli -> c.slli */ \ ++ if (do_compress() && (Rd == Rs1 && Rd != x0 && shamt != 0)) { \ ++ c_slli(Rd, shamt); \ ++ return; \ ++ } \ ++ _slli(Rd, Rs1, shamt); \ ++ } ++ ++ INSN(slli); ++ ++#undef INSN ++ ++// -------------------------- ++#define INSN(NAME, C_NAME, NORMAL_NAME) \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt) { \ ++ /* srai/srli -> c.srai/c.srli */ \ ++ if (do_compress() && (Rd == Rs1 && Rd->is_compressed_valid() && shamt != 0)) { \ ++ C_NAME(Rd, shamt); \ ++ return; \ ++ } \ ++ NORMAL_NAME(Rd, Rs1, shamt); \ ++ } ++ ++ INSN(srai, c_srai, _srai); ++ INSN(srli, c_srli, _srli); ++ ++#undef INSN ++ ++// -------------------------- ++// Upper Immediate Instruction ++// -------------------------- ++#define INSN(NAME) \ ++ void NAME(Register Rd, int32_t imm) { \ ++ /* lui -> c.lui */ \ ++ if (do_compress() && (Rd != x0 && Rd != x2 && imm != 0 && is_imm_in_range(imm, 18, 0))) { \ ++ c_lui(Rd, imm); \ ++ return; \ ++ } \ ++ _lui(Rd, imm); \ ++ } ++ ++ INSN(lui); ++ ++#undef INSN ++ ++// --------------------------------------------------------------------------------------- ++ + void bgt(Register Rs, Register Rt, const address &dest); + void ble(Register Rs, Register Rt, const address &dest); + void bgtu(Register Rs, Register Rt, const address &dest); @@ -3273,18 +3757,17 @@ index 000000000..d4da30ed6 + void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn); + void wrap_label(Register r, Label &L, jal_jalr_insn insn); + -+ // Computational pseudo instructions ++ // calculate pseudoinstruction + void add(Register Rd, Register Rn, int64_t increment, Register temp = t0); -+ void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0); -+ ++ void addw(Register Rd, Register Rn, int64_t increment, Register temp = t0); + void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0); -+ void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0); ++ void subw(Register Rd, Register Rn, int64_t decrement, Register temp = t0); + + // RVB pseudo instructions + // zero extend word + void zext_w(Register Rd, Register Rs); + -+ Assembler(CodeBuffer* code) : AbstractAssembler(code) { ++ Assembler(CodeBuffer* code) : AbstractAssembler(code), _in_compressible_region(false) { + } + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, @@ -3301,21 +3784,14 @@ index 000000000..d4da30ed6 + return is_imm_in_range(imm, 12, 0); + } + -+ // The maximum range of a branch is fixed for the riscv -+ // architecture. ++ // The maximum range of a branch is fixed for the RISCV architecture. + static const unsigned long branch_range = 1 * M; + + static bool reachable_from_branch_at(address branch, address target) { + return uabs(target - branch) < branch_range; + } + -+ static Assembler::SEW elemBytes_to_sew(int esize) { -+ assert(esize > 0 && esize <= 64 && is_power_of_2(esize), "unsupported element size"); -+ return (Assembler::SEW) exact_log2(esize); -+ } -+ + virtual ~Assembler() {} -+ +}; + +class BiasedLockingCounters; @@ -3323,12 +3799,12 @@ index 000000000..d4da30ed6 +#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp new file mode 100644 -index 000000000..82b825db7 +index 0000000000..7ffe880398 --- /dev/null +++ b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp @@ -0,0 +1,47 @@ +/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -3376,14 +3852,14 @@ index 000000000..82b825db7 +#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp new file mode 100644 -index 000000000..d0ac7ef46 +index 0000000000..f60e0e38ae --- /dev/null +++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp -@@ -0,0 +1,169 @@ +@@ -0,0 +1,165 @@ +/* -+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016 SAP SE. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -3409,8 +3885,6 @@ index 000000000..d0ac7ef46 +#ifndef CPU_RISCV_BYTES_RISCV_HPP +#define CPU_RISCV_BYTES_RISCV_HPP + -+#include "memory/allocation.hpp" -+ +class Bytes: AllStatic { + public: + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering @@ -3457,7 +3931,6 @@ index 000000000..d0ac7ef46 + ((u8)(((u4*)p)[0])); + + case 2: -+ case 6: + return ((u8)(((u2*)p)[3]) << 48) | + ((u8)(((u2*)p)[2]) << 32) | + ((u8)(((u2*)p)[1]) << 16) | @@ -3471,7 +3944,7 @@ index 000000000..d0ac7ef46 + ((u8)(p[3]) << 24) | + ((u8)(p[2]) << 16) | + ((u8)(p[1]) << 8) | -+ (u8)(p[0]); ++ ((u8)(p[0])); + } + } + @@ -3516,7 +3989,6 @@ index 000000000..d0ac7ef46 + break; + + case 2: -+ case 6: + ((u2*)p)[3] = x >> 48; + ((u2*)p)[2] = x >> 32; + ((u2*)p)[1] = x >> 16; @@ -3546,17 +4018,17 @@ index 000000000..d0ac7ef46 + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } +}; + -+#include OS_CPU_HEADER_INLINE(bytes) ++#include OS_CPU_HEADER(bytes) + +#endif // CPU_RISCV_BYTES_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp new file mode 100644 -index 000000000..522eedd29 +index 0000000000..12980c12de --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp -@@ -0,0 +1,352 @@ +@@ -0,0 +1,339 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -3588,6 +4060,7 @@ index 000000000..522eedd29 +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" ++#include "classfile/javaClasses.hpp" +#include "nativeInst_riscv.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_riscv.inline.hpp" @@ -3595,8 +4068,7 @@ index 000000000..522eedd29 + +#define __ ce->masm()-> + -+void CounterOverflowStub::emit_code(LIR_Assembler* ce) -+{ ++void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + Metadata *m = _method->as_constant_ptr()->as_metadata(); + __ mov_metadata(t0, m); @@ -3608,22 +4080,19 @@ index 000000000..522eedd29 + __ j(_continuation); +} + -+RangeCheckStub::RangeCheckStub(CodeEmitInfo *info, LIR_Opr index, LIR_Opr array) -+ : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) -+{ ++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) ++ : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) -+ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) -+{ ++ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + -+void RangeCheckStub::emit_code(LIR_Assembler* ce) -+{ ++void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_info->deoptimize_on_exception()) { + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); @@ -3655,13 +4124,11 @@ index 000000000..522eedd29 + debug_only(__ should_not_reach_here()); +} + -+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) -+{ ++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + -+void PredicateFailedStub::emit_code(LIR_Assembler* ce) -+{ ++void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); @@ -3670,8 +4137,7 @@ index 000000000..522eedd29 + debug_only(__ should_not_reach_here()); +} + -+void DivByZeroStub::emit_code(LIR_Assembler* ce) -+{ ++void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } @@ -3685,21 +4151,19 @@ index 000000000..522eedd29 +} + +// Implementation of NewInstanceStub -+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) -+{ ++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); -+ assert(stub_id == Runtime1::new_instance_id || -+ stub_id == Runtime1::fast_new_instance_id || ++ assert(stub_id == Runtime1::new_instance_id || ++ stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + -+void NewInstanceStub::emit_code(LIR_Assembler* ce) -+{ ++void NewInstanceStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + __ mv(x13, _klass_reg->as_register()); @@ -3711,16 +4175,14 @@ index 000000000..522eedd29 +} + +// Implementation of NewTypeArrayStub -+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) -+{ ++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + -+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) -+{ ++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == x9, "length must in x9"); @@ -3733,16 +4195,14 @@ index 000000000..522eedd29 +} + +// Implementation of NewObjectArrayStub -+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) -+{ ++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _result = result; + _length = length; + _info = new CodeEmitInfo(info); +} + -+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) -+{ ++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == x9, "length must in x9"); @@ -3756,13 +4216,11 @@ index 000000000..522eedd29 + +// Implementation of MonitorAccessStubs +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) -+: MonitorAccessStub(obj_reg, lock_reg) -+{ ++: MonitorAccessStub(obj_reg, lock_reg) { + _info = new CodeEmitInfo(info); +} + -+void MonitorEnterStub::emit_code(LIR_Assembler* ce) -+{ ++void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_obj_reg->as_register(), 1); @@ -3779,8 +4237,7 @@ index 000000000..522eedd29 + __ j(_continuation); +} + -+void MonitorExitStub::emit_code(LIR_Assembler* ce) -+{ ++void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_compute_lock) { + // lock_reg was destroyed by fast unlocking attempt => recompute it @@ -3798,18 +4255,23 @@ index 000000000..522eedd29 + __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id))); +} + ++// Implementation of patching: ++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) ++// - Replace original code with a call to the stub ++// At Runtime: ++// - call to stub, jump to runtime ++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) ++// - in runtime: after initializing class, restore original code, reexecute instruction ++ +int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; + +void PatchingStub::align_patch_site(MacroAssembler* masm) {} + -+// RISCV don't use C1 runtime patching. When need patch, just deoptimize. -+void PatchingStub::emit_code(LIR_Assembler* ce) -+{ ++void PatchingStub::emit_code(LIR_Assembler* ce) { + assert(false, "RISCV should not use C1 runtime patching"); +} + -+void DeoptimizeStub::emit_code(LIR_Assembler* ce) -+{ ++void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + ce->store_parameter(_trap_request, 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); @@ -3817,8 +4279,7 @@ index 000000000..522eedd29 + DEBUG_ONLY(__ should_not_reach_here()); +} + -+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) -+{ ++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a = NULL; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is probably wrong to do it here. @@ -3835,8 +4296,7 @@ index 000000000..522eedd29 + debug_only(__ should_not_reach_here()); +} + -+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) -+{ ++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + + __ bind(_entry); @@ -3845,19 +4305,17 @@ index 000000000..522eedd29 + if (_obj->is_cpu_register()) { + __ mv(t0, _obj->as_register()); + } -+ __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), t1); ++ __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, t1); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} + -+void ArrayCopyStub::emit_code(LIR_Assembler* ce) -+{ ++void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + // ---------------slow case: call to native----------------- + __ bind(_entry); + // Figure out where the args should go + // This should really convert the IntrinsicID to the Method* and signature + // but I don't know how to do that. -+ // + const int args_num = 5; + VMRegPair args[args_num]; + BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT }; @@ -3865,12 +4323,11 @@ index 000000000..522eedd29 + + // push parameters + Register r[args_num]; -+ int i = 0; -+ r[i++] = src()->as_register(); -+ r[i++] = src_pos()->as_register(); -+ r[i++] = dst()->as_register(); -+ r[i++] = dst_pos()->as_register(); -+ r[i++] = length()->as_register(); ++ r[0] = src()->as_register(); ++ r[1] = src_pos()->as_register(); ++ r[2] = dst()->as_register(); ++ r[3] = dst_pos()->as_register(); ++ r[4] = length()->as_register(); + + // next registers will get stored on the stack + for (int j = 0; j < args_num; j++) { @@ -3879,7 +4336,7 @@ index 000000000..522eedd29 + int st_off = r_1->reg2stack() * wordSize; + __ sd(r[j], Address(sp, st_off)); + } else { -+ assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg "); ++ assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg"); + } + } + @@ -3899,8 +4356,10 @@ index 000000000..522eedd29 + ce->add_call_info_here(info()); + +#ifndef PRODUCT -+ __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); -+ __ incrementw(Address(t1)); ++ if (PrintC1Statistics) { ++ __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); ++ __ add_memory_int32(Address(t1), 1); ++ } +#endif + + __ j(_continuation); @@ -3909,13 +4368,12 @@ index 000000000..522eedd29 +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp new file mode 100644 -index 000000000..a0f411352 +index 0000000000..4417ad6309 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp -@@ -0,0 +1,85 @@ +@@ -0,0 +1,84 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -3973,7 +4431,7 @@ index 000000000..a0f411352 + + pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan + pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan -+ pd_nof_xmm_regs_linearscan = 0, // like sparc we don't have any of these ++ pd_nof_xmm_regs_linearscan = 0, // don't have vector registers + + pd_first_cpu_reg = 0, + pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1, @@ -4000,13 +4458,12 @@ index 000000000..a0f411352 +#endif // CPU_RISCV_C1_DEFS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp new file mode 100644 -index 000000000..d4876625c +index 0000000000..e3a2606c53 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp -@@ -0,0 +1,31 @@ +@@ -0,0 +1,30 @@ +/* + * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4037,13 +4494,12 @@ index 000000000..d4876625c +// No FPU stack on RISCV diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp new file mode 100644 -index 000000000..4b43bc4d7 +index 0000000000..7bc3d31150 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp -@@ -0,0 +1,33 @@ +@@ -0,0 +1,32 @@ +/* -+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4076,13 +4532,12 @@ index 000000000..4b43bc4d7 +#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp new file mode 100644 -index 000000000..94b4e0f0b +index 0000000000..682ebe8262 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp -@@ -0,0 +1,391 @@ +@@ -0,0 +1,388 @@ +/* -+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4112,8 +4567,7 @@ index 000000000..94b4e0f0b +#include "runtime/sharedRuntime.hpp" +#include "vmreg_riscv.inline.hpp" + -+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) -+{ ++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); @@ -4129,7 +4583,7 @@ index 000000000..94b4e0f0b + Register reg2 = r_2->as_Register(); + assert(reg2 == reg1, "must be same register"); + opr = as_long_opr(reg1); -+ } else if (type == T_OBJECT || type == T_ARRAY) { ++ } else if (is_reference_type(type)) { + opr = as_oop_opr(reg1); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg1); @@ -4413,7 +4867,7 @@ index 000000000..94b4e0f0b + + +// ----------------mapping----------------------- -+// all mapping is based on rfp addressing, except for simple leaf methods where we access ++// all mapping is based on fp addressing, except for simple leaf methods where we access +// the locals sp based (and no frame is built) + + @@ -4430,7 +4884,7 @@ index 000000000..94b4e0f0b +// | .........| <- TOS +// | locals | +// +----------+ -+// | old fp, | ++// | old fp, | +// +----------+ +// | ret addr | +// +----------+ @@ -4458,8 +4912,7 @@ index 000000000..94b4e0f0b + return as_FloatRegister(n)->as_VMReg(); +} + -+LIR_Opr FrameMap::stack_pointer() -+{ ++LIR_Opr FrameMap::stack_pointer() { + return FrameMap::sp_opr; +} + @@ -4473,13 +4926,12 @@ index 000000000..94b4e0f0b +} diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp new file mode 100644 -index 000000000..f600c2f6f +index 0000000000..01281f5c9e --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp -@@ -0,0 +1,149 @@ +@@ -0,0 +1,148 @@ +/* -+ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4628,13 +5080,12 @@ index 000000000..f600c2f6f +#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp new file mode 100644 -index 000000000..a846d60ae +index 0000000000..2a99d49c94 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp -@@ -0,0 +1,287 @@ +@@ -0,0 +1,285 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4671,16 +5122,15 @@ index 000000000..a846d60ae + +#define __ _masm-> + -+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) { -+ ++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, ++ LIR_Opr result, CodeEmitInfo* info) { + // opcode check + assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem"); + bool is_irem = (code == lir_irem); -+ -+ // operand check -+ assert(left->is_single_cpu(), "left must be register"); -+ assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant"); -+ assert(result->is_single_cpu(), "result must be register"); ++ // opreand check ++ assert(left->is_single_cpu(), "left must be a register"); ++ assert(right->is_single_cpu() || right->is_constant(), "right must be a register or constant"); ++ assert(result->is_single_cpu(), "result must be a register"); + Register lreg = left->as_register(); + Register dreg = result->as_register(); + @@ -4754,7 +5204,7 @@ index 000000000..a846d60ae + case lir_sub: __ subw(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } -+ break; ++ break; + case T_OBJECT: // fall through + case T_ADDRESS: + switch (code) { @@ -4762,7 +5212,7 @@ index 000000000..a846d60ae + case lir_sub: __ sub(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } -+ break; ++ break; + default: + ShouldNotReachHere(); + } @@ -4817,7 +5267,7 @@ index 000000000..a846d60ae + jlong c = right->as_constant_ptr()->as_jlong(); + Register dreg = as_reg(dest); + switch (code) { -+ case lir_add: ++ case lir_add: // fall through + case lir_sub: + if (c == 0 && dreg == lreg_lo) { + COMMENT("effective nop elided"); @@ -4826,12 +5276,12 @@ index 000000000..a846d60ae + code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c); + break; + case lir_div: -+ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move lreg_lo to dreg if divisor is 1 + __ mv(dreg, lreg_lo); + } else { -+ unsigned int shift = exact_log2(c); ++ unsigned int shift = exact_log2_long(c); + // use t0 as intermediate result register + __ srai(t0, lreg_lo, 0x3f); + if (is_imm_in_range(c - 1, 12, 0)) { @@ -4844,12 +5294,12 @@ index 000000000..a846d60ae + } + break; + case lir_rem: -+ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move 0 to dreg if divisor is 1 + __ mv(dreg, zr); + } else { -+ unsigned int shift = exact_log2(c); ++ unsigned int shift = exact_log2_long(c); + __ srai(t0, lreg_lo, 0x3f); + __ srli(t0, t0, BitsPerLong - shift); + __ add(t1, lreg_lo, t0); @@ -4874,9 +5324,9 @@ index 000000000..a846d60ae + switch (code) { + case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; -+ case lir_mul_strictfp: // fall through ++ case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; -+ case lir_div_strictfp: // fall through ++ case lir_div_strictfp: // fall through + case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: + ShouldNotReachHere(); @@ -4889,9 +5339,9 @@ index 000000000..a846d60ae + switch (code) { + case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; -+ case lir_mul_strictfp: // fall through ++ case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; -+ case lir_div_strictfp: // fall through ++ case lir_div_strictfp: // fall through + case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: + ShouldNotReachHere(); @@ -4921,13 +5371,12 @@ index 000000000..a846d60ae +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp new file mode 100644 -index 000000000..93530ef58 +index 0000000000..ab0a9963fc --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp -@@ -0,0 +1,36 @@ +@@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -4950,6 +5399,7 @@ index 000000000..93530ef58 + * questions. + * + */ ++ +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP +#define CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP + @@ -4960,17 +5410,17 @@ index 000000000..93530ef58 + void arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); + void arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, Register lreg, Register dreg); + void arithmetic_idiv(LIR_Op3* op, bool is_irem); ++ +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp new file mode 100644 -index 000000000..31f8d6a4a +index 0000000000..b7f53e395f --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp -@@ -0,0 +1,387 @@ +@@ -0,0 +1,388 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -4999,6 +5449,7 @@ index 000000000..31f8d6a4a +#include "c1/c1_MacroAssembler.hpp" +#include "ci/ciArrayKlass.hpp" +#include "oops/objArrayKlass.hpp" ++#include "runtime/stubRoutines.hpp" + +#define __ _masm-> + @@ -5026,7 +5477,7 @@ index 000000000..31f8d6a4a + __ mv(c_rarg4, j_rarg4); +#ifndef PRODUCT + if (PrintC1Statistics) { -+ __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt)); ++ __ add_memory_int32(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt), 1); + } +#endif + __ far_call(RuntimeAddress(copyfunc_addr)); @@ -5064,14 +5515,14 @@ index 000000000..31f8d6a4a + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset()))); -+ __ mv(t1, Klass::_lh_neutral_value); ++ __ li(t1, Klass::_lh_neutral_value); + __ bge(t0, t1, *stub->entry(), /* is_far */ true); + } + + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset()))); -+ __ mv(t1, Klass::_lh_neutral_value); ++ __ li(t1, Klass::_lh_neutral_value); + __ bge(t0, t1, *stub->entry(), /* is_far */ true); + } + } @@ -5133,7 +5584,7 @@ index 000000000..31f8d6a4a + if (PrintC1Statistics) { + Label failed; + __ bnez(x10, failed); -+ __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt)); ++ __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt), 1); + __ bind(failed); + } +#endif @@ -5142,7 +5593,7 @@ index 000000000..31f8d6a4a + +#ifndef PRODUCT + if (PrintC1Statistics) { -+ __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt)); ++ __ add_memory_int32(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt), 1); + } +#endif + assert_different_registers(dst, dst_pos, length, src_pos, src, x10, t0); @@ -5214,6 +5665,7 @@ index 000000000..31f8d6a4a +void LIR_Assembler::arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags) { + assert(default_type != NULL, "NULL default_type!"); + BasicType basic_type = default_type->element_type()->basic_type(); ++ + if (basic_type == T_ARRAY) { basic_type = T_OBJECT; } + if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { + // Sanity check the known type with the incoming class. For the @@ -5269,7 +5721,7 @@ index 000000000..31f8d6a4a + CodeStub* stub = op->stub(); + int flags = op->flags(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; -+ if (basic_type == T_ARRAY) { basic_type = T_OBJECT; } ++ if (is_reference_type(basic_type)) { basic_type = T_OBJECT; } + + // if we don't know anything, just go through the generic arraycopy + if (default_type == NULL) { @@ -5292,7 +5744,7 @@ index 000000000..31f8d6a4a + +#ifndef PRODUCT + if (PrintC1Statistics) { -+ __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type))); ++ __ add_memory_int32(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)), 1); + } +#endif + arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type); @@ -5356,13 +5808,12 @@ index 000000000..31f8d6a4a +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp new file mode 100644 -index 000000000..872fd2ef6 +index 0000000000..06a0f248ca --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp -@@ -0,0 +1,51 @@ +@@ -0,0 +1,52 @@ +/* + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -5388,6 +5839,7 @@ index 000000000..872fd2ef6 + +#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP +#define CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP ++ + // arraycopy sub functions + void generic_arraycopy(Register src, Register src_pos, Register length, + Register dst, Register dst_pos, CodeStub *stub); @@ -5410,17 +5862,18 @@ index 000000000..872fd2ef6 + Register dst, Register dst_pos); + void arraycopy_load_args(Register src, Register src_pos, Register length, + Register dst, Register dst_pos); ++ +#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp new file mode 100644 -index 000000000..222e3e97e +index 0000000000..1e482d7cc2 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp -@@ -0,0 +1,2275 @@ +@@ -0,0 +1,2268 @@ +/* + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -5455,14 +5908,11 @@ index 000000000..222e3e97e +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "code/compiledIC.hpp" -+#include "gc/shared/barrierSet.hpp" -+#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" -+#include "utilities/macros.hpp" +#include "vmreg_riscv.inline.hpp" + +#ifndef PRODUCT @@ -5512,7 +5962,6 @@ index 000000000..222e3e97e + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + -+ +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; +} @@ -5521,25 +5970,11 @@ index 000000000..222e3e97e + return FrameMap::as_pointer_opr(receiverOpr()->as_register()); +} + -+//--------------fpu register translations----------------------- -+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } -+ -+void LIR_Assembler::reset_FPU() { Unimplemented(); } -+ -+void LIR_Assembler::fpop() { Unimplemented(); } -+ -+void LIR_Assembler::fxch(int i) { Unimplemented(); } -+ -+void LIR_Assembler::fld(int i) { Unimplemented(); } -+ -+void LIR_Assembler::ffree(int i) { Unimplemented(); } -+ +void LIR_Assembler::breakpoint() { Unimplemented(); } + +void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } + +void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } -+//------------------------------------------- + +static jlong as_long(LIR_Opr data) { + jlong result; @@ -5557,6 +5992,43 @@ index 000000000..222e3e97e + return result; +} + ++Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { ++ if (addr->base()->is_illegal()) { ++ assert(addr->index()->is_illegal(), "must be illegal too"); ++ __ movptr(tmp, addr->disp()); ++ return Address(tmp, 0); ++ } ++ ++ Register base = addr->base()->as_pointer_register(); ++ LIR_Opr index_opr = addr->index(); ++ ++ if (index_opr->is_illegal()) { ++ return Address(base, addr->disp()); ++ } ++ ++ int scale = addr->scale(); ++ if (index_opr->is_cpu_register()) { ++ Register index; ++ if (index_opr->is_single_cpu()) { ++ index = index_opr->as_register(); ++ } else { ++ index = index_opr->as_register_lo(); ++ } ++ if (scale != 0) { ++ __ shadd(tmp, index, base, tmp, scale); ++ } else { ++ __ add(tmp, base, index); ++ } ++ return Address(tmp, addr->disp()); ++ } else if (index_opr->is_constant()) { ++ intptr_t addr_offset = (((intptr_t)index_opr->as_constant_ptr()->as_jint()) << scale) + addr->disp(); ++ return Address(base, addr_offset); ++ } ++ ++ Unimplemented(); ++ return Address(); ++} ++ +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { + ShouldNotReachHere(); + return Address(); @@ -5572,7 +6044,7 @@ index 000000000..222e3e97e + +// Ensure a valid Address (base + offset) to a stack-slot. If stack access is +// not encodable as a base + (immediate) offset, generate an explicit address -+// calculation to hold the address in a temporary register. ++// calculation to hold the address in t0. +Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) { + precond(size == 4 || size == 8); + Address addr = frame_map()->address_for_slot(index, adjust); @@ -5690,10 +6162,7 @@ index 000000000..222e3e97e +int LIR_Assembler::initial_frame_size_in_bytes() const { + // if rounding, must let FrameMap know! + -+ // The frame_map records size in slots (32bit word) -+ -+ // subtract two words to account for return address and link -+ return (frame_map()->framesize() - (2 * VMRegImpl::slots_per_word)) * VMRegImpl::stack_slot_size; ++ return in_bytes(frame_map()->framesize_in_bytes()); +} + +int LIR_Assembler::emit_exception_handler() { @@ -6007,7 +6476,7 @@ index 000000000..222e3e97e + } + move_regs(src->as_register(), dest->as_register()); + } else if (dest->is_double_cpu()) { -+ if (src->type() == T_OBJECT || src->type() == T_ARRAY) { ++ if (is_reference_type(src->type())) { + __ verify_oop(src->as_register()); + move_regs(src->as_register(), dest->as_register_lo()); + return; @@ -6064,8 +6533,7 @@ index 000000000..222e3e97e + } +} + -+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, -+ bool pop_fpu_stack, bool wide, bool /* unaligned */) { ++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { + LIR_Address* to_addr = dest->as_address_ptr(); + // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src + Register compressed_src = t1; @@ -6075,7 +6543,7 @@ index 000000000..222e3e97e + return; + } + -+ if (type == T_ARRAY || type == T_OBJECT) { ++ if (is_reference_type(type)) { + __ verify_oop(src->as_register()); + + if (UseCompressedOops && !wide) { @@ -6187,8 +6655,7 @@ index 000000000..222e3e97e + reg2stack(temp, dest, dest->type(), false); +} + -+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, -+ bool wide, bool /* unaligned */) { ++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { + assert(src->is_address(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + @@ -6233,6 +6700,9 @@ index 000000000..222e3e97e + __ ld(dest->as_register(), as_Address(from_addr)); + break; + case T_ADDRESS: ++ // FIXME: OMG this is a horrible kludge. Any offset from an ++ // address that matches klass_offset_in_bytes() will be loaded ++ // as a word, not a long. + if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { + __ lwu(dest->as_register(), as_Address(from_addr)); + } else { @@ -6261,7 +6731,7 @@ index 000000000..222e3e97e + ShouldNotReachHere(); + } + -+ if (type == T_ARRAY || type == T_OBJECT) { ++ if (is_reference_type(type)) { + if (UseCompressedOops && !wide) { + __ decode_heap_oop(dest->as_register()); + } @@ -6275,7 +6745,7 @@ index 000000000..222e3e97e + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + switch (op->code()) { -+ case lir_idiv: ++ case lir_idiv: // fall through + case lir_irem: + arithmetic_idiv(op->code(), + op->in_opr1(), @@ -6431,8 +6901,8 @@ index 000000000..222e3e97e + Register len = op->len()->as_register(); + + if (UseSlowPath || -+ (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) || -+ (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) { ++ (!UseFastNewObjectArray && is_reference_type(op->type())) || ++ (!UseFastNewTypeArray && !is_reference_type(op->type()))) { + __ j(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); @@ -6467,7 +6937,7 @@ index 000000000..222e3e97e + __ ld(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + __ bne(recv, t1, next_test); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); -+ __ increment(data_addr, DataLayout::counter_increment); ++ __ add_memory_int64(data_addr, DataLayout::counter_increment); + __ j(*update_done); + __ bind(next_test); + } @@ -6479,7 +6949,7 @@ index 000000000..222e3e97e + __ ld(t1, recv_addr); + __ bnez(t1, next_test); + __ sd(recv, recv_addr); -+ __ mv(t1, DataLayout::counter_increment); ++ __ li(t1, DataLayout::counter_increment); + __ sd(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); + __ j(*update_done); + __ bind(next_test); @@ -6505,7 +6975,7 @@ index 000000000..222e3e97e + __ load_klass(klass_RInfo, obj); + if (k->is_loaded()) { + // See if we get an immediate positive hit -+ __ ld(t0, Address(klass_RInfo, long(k->super_check_offset()))); ++ __ ld(t0, Address(klass_RInfo, int64_t(k->super_check_offset()))); + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { + __ bne(k_RInfo, t0, *failure_target, /* is_far */ true); + // successful cast, fall through to profile or jump @@ -6550,10 +7020,7 @@ index 000000000..222e3e97e + // Object is null, update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); -+ Address data_addr = __ form_address(mdo, /* base */ -+ md->byte_offset_of_slot(data, DataLayout::flags_offset()), /* offset */ -+ 12, /* expect offset bits */ -+ t1); /* temp reg */ ++ Address data_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); + __ lbu(t0, data_addr); + __ ori(t0, t0, BitData::null_seen_byte_constant()); + __ sb(t0, data_addr); @@ -6758,7 +7225,12 @@ index 000000000..222e3e97e + } +} + -+void LIR_Assembler::align_call(LIR_Code code) { } ++void LIR_Assembler::align_call(LIR_Code code) { ++ // With RVC a call instruction may get 2-byte aligned. ++ // The address of the call instruction needs to be 4-byte aligned to ++ // ensure that it does not span a cache line so that it can be patched. ++ __ align(4); ++} + +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + address call = __ trampoline_call(Address(op->addr(), rtype)); @@ -6778,10 +7250,14 @@ index 000000000..222e3e97e + add_call_info(code_offset(), op->info()); +} + -+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ShouldNotReachHere(); } ++/* Currently, vtable-dispatch is only enabled for sparc platforms */ ++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ++ ShouldNotReachHere(); ++} + +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); ++ assert((__ offset() % 4) == 0, "bad alignment"); + address stub = __ start_a_stub(call_stub_size()); + if (stub == NULL) { + bailout("static call stub overflow"); @@ -6793,7 +7269,8 @@ index 000000000..222e3e97e + __ relocate(static_stub_Relocation::spec(call_pc)); + __ emit_static_call_stub(); + -+ assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), "stub too big"); ++ assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() ++ <= call_stub_size(), "stub too big"); + __ end_a_stub(); +} + @@ -6838,7 +7315,6 @@ index 000000000..222e3e97e + __ j(_unwind_handler_entry); +} + -+ +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { + Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); @@ -6866,7 +7342,6 @@ index 000000000..222e3e97e + } +} + -+ +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { + Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); @@ -6901,8 +7376,6 @@ index 000000000..222e3e97e + } +} + -+ -+ +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); @@ -6962,7 +7435,7 @@ index 000000000..222e3e97e + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); -+ __ increment(data_addr, DataLayout::counter_increment); ++ __ add_memory_int64(data_addr, DataLayout::counter_increment); + return; + } + } @@ -6978,7 +7451,7 @@ index 000000000..222e3e97e + __ mov_metadata(t1, known_klass->constant_encoding()); + __ sd(t1, recv_addr); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); -+ __ increment(data_addr, DataLayout::counter_increment); ++ __ add_memory_int64(data_addr, DataLayout::counter_increment); + return; + } + } @@ -6988,13 +7461,13 @@ index 000000000..222e3e97e + type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. -+ __ increment(counter_addr, DataLayout::counter_increment); ++ __ add_memory_int64(counter_addr, DataLayout::counter_increment); + + __ bind(update_done); + } + } else { + // Static call -+ __ increment(counter_addr, DataLayout::counter_increment); ++ __ add_memory_int64(counter_addr, DataLayout::counter_increment); + } +} + @@ -7029,7 +7502,7 @@ index 000000000..222e3e97e + + if (TypeEntries::is_type_none(current_klass)) { + __ beqz(t1, none); -+ __ mv(t0, (u1)TypeEntries::null_seen); ++ __ li(t0, (u1)TypeEntries::null_seen); + __ beq(t0, t1, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the @@ -7079,7 +7552,7 @@ index 000000000..222e3e97e + Label ok; + __ ld(t0, mdo_addr); + __ beqz(t0, ok); -+ __ mv(t1, (u1)TypeEntries::null_seen); ++ __ li(t1, (u1)TypeEntries::null_seen); + __ beq(t0, t1, ok); + // may have been set by another thread + __ membar(MacroAssembler::LoadLoad); @@ -7205,26 +7678,27 @@ index 000000000..222e3e97e + return; + } +#endif ++ + assert(patch_code == lir_patch_none, "Patch code not supported"); + LIR_Address* adr = addr->as_address_ptr(); + Register dst = dest->as_register_lo(); + + assert_different_registers(dst, t0); -+ if(adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) { -+ ++ if (adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) { ++ int scale = adr->scale(); + intptr_t offset = adr->disp(); + LIR_Opr index_op = adr->index(); -+ int scale = adr->scale(); -+ if(index_op->is_constant()) { ++ if (index_op->is_constant()) { + offset += ((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale; + } + -+ if(!is_imm_in_range(offset, 12, 0)) { ++ if (!is_imm_in_range(offset, 12, 0)) { + __ la(t0, as_Address(adr)); + __ mv(dst, t0); + return; + } + } ++ + __ la(dst, as_Address(adr)); +} + @@ -7248,8 +7722,7 @@ index 000000000..222e3e97e + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + if (dest->is_address() || src->is_address()) { -+ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, -+ /* unaligned */ false, /* wide */ false); ++ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, /*unaligned*/ false, /* wide */ false); + } else { + ShouldNotReachHere(); + } @@ -7326,7 +7799,7 @@ index 000000000..222e3e97e +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) { + Address addr = as_Address(src->as_address_ptr()); + BasicType type = src->type(); -+ bool is_oop = type == T_OBJECT || type == T_ARRAY; ++ bool is_oop = is_reference_type(type); + + get_op(type); + @@ -7376,41 +7849,6 @@ index 000000000..222e3e97e + return exact_log2(elem_size); +} + -+Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { -+ if (addr->base()->is_illegal()) { -+ assert(addr->index()->is_illegal(), "must be illegal too"); -+ __ movptr(tmp, addr->disp()); -+ return Address(tmp, 0); -+ } -+ -+ Register base = addr->base()->as_pointer_register(); -+ LIR_Opr index_op = addr->index(); -+ int scale = addr->scale(); -+ -+ if (index_op->is_illegal()) { -+ return Address(base, addr->disp()); -+ } else if (index_op->is_cpu_register()) { -+ Register index; -+ if (index_op->is_single_cpu()) { -+ index = index_op->as_register(); -+ } else { -+ index = index_op->as_register_lo(); -+ } -+ if (scale != 0) { -+ __ shadd(tmp, index, base, tmp, scale); -+ } else { -+ __ add(tmp, base, index); -+ } -+ return Address(tmp, addr->disp()); -+ } else if (index_op->is_constant()) { -+ intptr_t addr_offset = (((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale) + addr->disp(); -+ return Address(base, addr_offset); -+ } -+ -+ Unimplemented(); -+ return Address(); -+} -+ +// helper functions which checks for overflow and sets bailout if it +// occurs. Always returns a valid embeddable pointer but in the +// bailout case the pointer won't be to unique storage. @@ -7444,15 +7882,17 @@ index 000000000..222e3e97e + } +} + -+void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { -+ _masm->code_section()->relocate(adr, relocInfo::poll_type); -+ int pc_offset = code_offset(); -+ flush_debug_info(pc_offset); -+ info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); -+ if (info->exception_handlers() != NULL) { -+ compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); -+ } -+} ++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::reset_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::fpop() { Unimplemented(); } ++ ++void LIR_Assembler::fxch(int i) { Unimplemented(); } ++ ++void LIR_Assembler::fld(int i) { Unimplemented(); } ++ ++void LIR_Assembler::ffree(int i) { Unimplemented(); } + +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { + __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */, @@ -7498,7 +7938,6 @@ index 000000000..222e3e97e + add_call_info_here(info); +} + -+ +void LIR_Assembler::check_exact_klass(Register tmp, ciKlass* exact_klass) { + Label ok; + __ load_klass(tmp, tmp); @@ -7588,6 +8027,16 @@ index 000000000..222e3e97e + __ bind(done); +} + ++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { ++ _masm->code_section()->relocate(adr, relocInfo::poll_type); ++ int pc_offset = code_offset(); ++ flush_debug_info(pc_offset); ++ info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); ++ if (info->exception_handlers() != NULL) { ++ compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); ++ } ++} ++ +void LIR_Assembler::type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo, + ciProfileData* data, Label* success, Label* failure, + Label& profile_cast_success, Label& profile_cast_failure) { @@ -7602,10 +8051,7 @@ index 000000000..222e3e97e + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); -+ Address counter_addr = __ form_address(mdo, /* base */ -+ md->byte_offset_of_slot(data, CounterData::count_offset()), /* offset */ -+ 12, /* expect offset bits */ -+ t1); /* temp reg */ ++ Address counter_addr = __ form_address(t1, mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ ld(t0, counter_addr); + __ addi(t0, t0, -DataLayout::counter_increment); + __ sd(t0, counter_addr); @@ -7687,21 +8133,21 @@ index 000000000..222e3e97e + assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); + int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; + assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); -+ __ mv(t0, c); ++ __ li(t0, c); + __ sd(t0, Address(sp, offset_from_rsp_in_bytes)); +} + +#undef __ diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp new file mode 100644 -index 000000000..11a47fd6e +index 0000000000..5c81f1c704 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp -@@ -0,0 +1,132 @@ +@@ -0,0 +1,133 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -7743,9 +8189,6 @@ index 000000000..11a47fd6e + + Address as_Address(LIR_Address* addr, Register tmp); + -+ // Ensure we have a valid Address (base+offset) to a stack-slot. -+ Address stack_slot_address(int index, uint shift, int adjust = 0); -+ + // helper functions which checks for overflow and sets bailout if it + // occurs. Always returns a valid embeddable pointer but in the + // bailout case the pointer won't be to unique storage. @@ -7753,6 +8196,9 @@ index 000000000..11a47fd6e + address double_constant(double d); + address int_constant(jlong n); + ++ // Ensure we have a valid Address (base + offset) to a stack-slot. ++ Address stack_slot_address(int index, uint shift, int adjust = 0); ++ + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, + ciMethodData *md, ciProfileData *data, @@ -7768,17 +8214,16 @@ index 000000000..11a47fd6e + + void deoptimize_trap(CodeEmitInfo *info); + -+ enum -+ { -+ // see emit_static_call_stub for detail: ++ enum { ++ // See emit_static_call_stub for detail + // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address) + _call_stub_size = 14 * NativeInstruction::instruction_size + + (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size), + _call_aot_stub_size = 0, -+ // see emit_exception_handler for detail: ++ // See emit_exception_handler for detail + // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY) + _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller -+ // see emit_deopt_handler for detail ++ // See emit_deopt_handler for detail + // auipc (1) + far_jump (6 or 2) + _deopt_handler_size = 1 * NativeInstruction::instruction_size + + 6 * NativeInstruction::instruction_size // or smaller @@ -7789,10 +8234,12 @@ index 000000000..11a47fd6e + void check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, Address mdo_addr, Label &next); + + void check_exact_klass(Register tmp, ciKlass* exact_klass); ++ + void check_null(Register tmp, Label &update, intptr_t current_klass, Address mdo_addr, bool do_update, Label &next); + + void (MacroAssembler::*add)(Register prev, RegisterOrConstant incr, Register addr); + void (MacroAssembler::*xchg)(Register prev, Register newv, Register addr); ++ + void get_op(BasicType type); + + // emit_typecheck_helper sub functions @@ -7832,12 +8279,12 @@ index 000000000..11a47fd6e +#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp new file mode 100644 -index 000000000..8ba9ed66d +index 0000000000..c41819fc2a --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp -@@ -0,0 +1,1083 @@ +@@ -0,0 +1,1094 @@ +/* -+ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -7980,7 +8427,6 @@ index 000000000..8ba9ed66d + return false; +} + -+ +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { + if (c->as_constant() != NULL) { + long constant = 0; @@ -7996,7 +8442,6 @@ index 000000000..8ba9ed66d + return false; +} + -+ +LIR_Opr LIRGenerator::safepoint_poll_register() { + return LIR_OprFact::illegalOpr; +} @@ -8004,7 +8449,7 @@ index 000000000..8ba9ed66d +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + assert(base->is_register(), "must be"); -+ ++ + if (index->is_constant()) { + LIR_Const *constant = index->as_constant_ptr(); + jlong c; @@ -8031,17 +8476,23 @@ index 000000000..8ba9ed66d + int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); + int elem_size = type2aelembytes(type); + int shift = exact_log2(elem_size); -+ + return generate_address(array_opr, index_opr, shift, offset_in_bytes, type); +} + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { ++ LIR_Opr r; + switch (type) { -+ case T_LONG: return LIR_OprFact::longConst(x); -+ case T_INT: return LIR_OprFact::intConst(x); -+ default: ShouldNotReachHere(); ++ case T_LONG: ++ r = LIR_OprFact::longConst(x); ++ break; ++ case T_INT: ++ r = LIR_OprFact::intConst(x); ++ break; ++ default: ++ ShouldNotReachHere(); ++ r = NULL; + } -+ return NULL; ++ return r; +} + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { @@ -8111,10 +8562,10 @@ index 000000000..8ba9ed66d + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); -+ // Need a tmp register for biased locking -+ LIR_Opr tmp = LIR_OprFact::illegalOpr; ++ // Need a scratch register for biased locking ++ LIR_Opr scratch = LIR_OprFact::illegalOpr; + if (UseBiasedLocking) { -+ tmp = new_register(T_INT); ++ scratch = new_register(T_INT); + } + + CodeEmitInfo* info_for_exception = NULL; @@ -8124,7 +8575,7 @@ index 000000000..8ba9ed66d + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); -+ monitor_enter(obj.result(), lock, syncTempOpr(), tmp, ++ monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + x->monitor_no(), info_for_exception, info); +} + @@ -8208,7 +8659,7 @@ index 000000000..8ba9ed66d +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + + // missing test if instr is commutative and if we should swap -+ LIRItem left(x->x(), this); ++ LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { @@ -8221,7 +8672,7 @@ index 000000000..8ba9ed66d + // no need to do div-by-zero check if the divisor is a non-zero constant + if (c != 0) { need_zero_check = false; } + // do not load right if the divisor is a power-of-2 constant -+ if (c > 0 && is_power_of_2(c)) { ++ if (c > 0 && is_power_of_2_long(c)) { + right.dont_load_item(); + } else { + right.load_item(); @@ -8232,7 +8683,7 @@ index 000000000..8ba9ed66d + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); -+ __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info)); ++ __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info)); + } + + rlock_result(x); @@ -8306,16 +8757,16 @@ index 000000000..8ba9ed66d + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0)); -+ __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info)); ++ __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info)); + } + + LIR_Opr ill = LIR_OprFact::illegalOpr; -+ + if (x->op() == Bytecodes::_irem) { + __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } else if (x->op() == Bytecodes::_idiv) { + __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } ++ + } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { + if (right.is_constant() && + ((x->op() == Bytecodes::_iadd && !Assembler::operand_valid_for_add_immediate(right.get_jint_constant())) || @@ -8389,7 +8840,7 @@ index 000000000..8ba9ed66d + left.load_item(); + rlock_result(x); + ValueTag tag = right.type()->tag(); -+ if(right.is_constant() && ++ if (right.is_constant() && + ((tag == longTag && Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) || + (tag == intTag && Assembler::operand_valid_for_add_immediate(right.get_jint_constant())))) { + right.dont_load_item(); @@ -8438,7 +8889,7 @@ index 000000000..8ba9ed66d + new_value.load_item(); + cmp_value.load_item(); + LIR_Opr result = new_register(T_INT); -+ if (type == T_OBJECT || type == T_ARRAY) { ++ if (is_reference_type(type)) { + __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result); + } else if (type == T_INT) { + __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill); @@ -8452,7 +8903,7 @@ index 000000000..8ba9ed66d +} + +LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) { -+ bool is_oop = type == T_OBJECT || type == T_ARRAY; ++ bool is_oop = is_reference_type(type); + LIR_Opr result = new_register(type); + value.load_item(); + assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type"); @@ -8513,19 +8964,30 @@ index 000000000..8ba9ed66d +void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { + LIRItem value(x->argument_at(0), this); + value.set_destroys_register(); ++ + LIR_Opr calc_result = rlock_result(x); + LIR_Opr result_reg = result_register_for(x->type()); ++ + CallingConvention* cc = NULL; -+ BasicTypeList signature(1); -+ signature.append(T_DOUBLE); -+ if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); } -+ cc = frame_map()->c_calling_convention(&signature); -+ value.load_item_force(cc->at(0)); ++ + if (x->id() == vmIntrinsics::_dpow) { + LIRItem value1(x->argument_at(1), this); ++ + value1.set_destroys_register(); ++ ++ BasicTypeList signature(2); ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); + value1.load_item_force(cc->at(1)); ++ } else { ++ BasicTypeList signature(1); ++ signature.append(T_DOUBLE); ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); + } ++ + switch (x->id()) { + case vmIntrinsics::_dexp: + if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); } @@ -8913,20 +9375,16 @@ index 000000000..8ba9ed66d + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { -+ if (!UseBarriersForVolatile) { -+ __ membar(); -+ } -+ + __ volatile_load_mem_reg(address, result, info); +} diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp new file mode 100644 -index 000000000..00e33e882 +index 0000000000..0317ed9003 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp @@ -0,0 +1,55 @@ +/* -+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -8976,18 +9434,18 @@ index 000000000..00e33e882 +void LIR_Address::verify() const { + assert(base()->is_cpu_register(), "wrong base operand"); + assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand"); -+ assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, -+ "wrong type for addresses"); ++ assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || base()->type() == T_LONG || ++ base()->type() == T_METADATA, "wrong type for addresses"); +} +#endif // PRODUCT diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp new file mode 100644 -index 000000000..60dcdc0e1 +index 0000000000..78a61128bd --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp @@ -0,0 +1,33 @@ +/* -+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -9021,14 +9479,14 @@ index 000000000..60dcdc0e1 +} diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp new file mode 100644 -index 000000000..f0aa08a39 +index 0000000000..d7ca7b0fd0 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp -@@ -0,0 +1,85 @@ +@@ -0,0 +1,83 @@ +/* -+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9063,7 +9521,6 @@ index 000000000..f0aa08a39 + return 1; +} + -+ +inline bool LinearScan::requires_adjacent_regs(BasicType type) { + return false; +} @@ -9085,8 +9542,8 @@ index 000000000..f0aa08a39 + return false; +} + -+ +inline void LinearScan::pd_add_temps(LIR_Op* op) { ++ // No special case behaviours yet +} + + @@ -9099,8 +9556,8 @@ index 000000000..f0aa08a39 + _first_reg = pd_first_callee_saved_reg; + _last_reg = pd_last_callee_saved_reg; + return true; -+ } else if (cur->type() == T_INT || cur->type() == T_LONG || -+ cur->type() == T_OBJECT || cur->type() == T_ADDRESS || cur->type() == T_METADATA) { ++ } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || ++ cur->type() == T_ADDRESS || cur->type() == T_METADATA) { + _first_reg = pd_first_cpu_reg; + _last_reg = pd_last_allocatable_cpu_reg; + return true; @@ -9108,18 +9565,17 @@ index 000000000..f0aa08a39 + return false; +} + -+ +#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp new file mode 100644 -index 000000000..370ec45c6 +index 0000000000..99d981f97f --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp -@@ -0,0 +1,441 @@ +@@ -0,0 +1,443 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9143,13 +9599,14 @@ index 000000000..370ec45c6 + */ + +#include "precompiled.hpp" ++#include "c1/c1_LIR.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/systemDictionary.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" -+#include "oops/markOop.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" @@ -9167,7 +9624,7 @@ index 000000000..370ec45c6 + } +} + -+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case) { ++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { + const int aligned_mask = BytesPerWord - 1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); @@ -9180,8 +9637,8 @@ index 000000000..370ec45c6 + sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + + if (UseBiasedLocking) { -+ assert(tmp != noreg, "should have tmp register at this point"); -+ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, tmp, false, done, &slow_case); ++ assert(scratch != noreg, "should have scratch register at this point"); ++ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); + } else { + null_check_offset = offset(); + } @@ -9212,7 +9669,7 @@ index 000000000..370ec45c6 + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + sub(hdr, hdr, sp); -+ mv(t0, aligned_mask - os::vm_page_size()); ++ li(t0, aligned_mask - os::vm_page_size()); + andr(hdr, hdr, t0); + // for recursive locking, the result is zero => save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking) @@ -9222,7 +9679,7 @@ index 000000000..370ec45c6 + bind(done); + if (PrintBiasedLockingStatistics) { + la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); -+ incrementw(Address(t1, 0)); ++ add_memory_int32(Address(t1, 0), 1); + } + return null_check_offset; +} @@ -9298,7 +9755,7 @@ index 000000000..370ec45c6 +} + +// preserves obj, destroys len_in_bytes -+void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1) { ++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp) { + assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); + Label done; + @@ -9310,7 +9767,7 @@ index 000000000..370ec45c6 + if (hdr_size_in_bytes) { + add(obj, obj, hdr_size_in_bytes); + } -+ zero_memory(obj, len_in_bytes, tmp1); ++ zero_memory(obj, len_in_bytes, tmp); + if (hdr_size_in_bytes) { + sub(obj, obj, hdr_size_in_bytes); + } @@ -9435,19 +9892,20 @@ index 000000000..370ec45c6 + +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { + // If we have to make this method not-entrant we'll overwrite its -+ // first instruction with a jump. For this action to be legal we ++ // first instruction with a jump. For this action to be legal we + // must ensure that this first instruction is a J, JAL or NOP. + // Make it a NOP. + nop(); ++ + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); + // Make sure there is enough stack space for this method's activation. -+ // Note that we do this before doing an enter(). ++ // Note that we do this before creating a frame. + generate_stack_overflow_check(bang_size_in_bytes); -+ MacroAssembler::build_frame(framesize + 2 * wordSize); // 2: multipler for wordSize ++ MacroAssembler::build_frame(framesize); +} + +void C1_MacroAssembler::remove_frame(int framesize) { -+ MacroAssembler::remove_frame(framesize + 2 * wordSize); // 2: multiper for wordSize ++ MacroAssembler::remove_frame(framesize); +} + + @@ -9539,9 +9997,9 @@ index 000000000..370ec45c6 + if (type == T_OBJECT || type == T_ARRAY) { + assert(cmpFlag == lir_cond_equal || cmpFlag == lir_cond_notEqual, "Should be equal or notEqual"); + if (cmpFlag == lir_cond_equal) { -+ oop_equal(op1, op2, label, is_far); ++ beq(op1, op2, label, is_far); + } else { -+ oop_nequal(op1, op2, label, is_far); ++ bne(op1, op2, label, is_far); + } + } else { + assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])), @@ -9559,14 +10017,14 @@ index 000000000..370ec45c6 +} diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp new file mode 100644 -index 000000000..5d0cefe89 +index 0000000000..1950cee5dd --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp @@ -0,0 +1,121 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9614,7 +10072,7 @@ index 000000000..5d0cefe89 + ); + + void initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2); -+ void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1); ++ void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp); + + void float_cmp(bool is_float, int unordered_result, + FloatRegister f0, FloatRegister f1, @@ -9624,9 +10082,9 @@ index 000000000..5d0cefe89 + // hdr : must be x10, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved -+ // tmp : temporary register, contents destroyed ++ // scratch : scratch register, contents destroyed + // returns code offset at which to add null check debug information -+ int lock_object (Register swap, Register obj, Register disp_hdr, Register tmp, Label& slow_case); ++ int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + + // unlocking + // hdr : contents destroyed @@ -9686,14 +10144,14 @@ index 000000000..5d0cefe89 +#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp new file mode 100644 -index 000000000..f06e7b51c +index 0000000000..329df2e1ca --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp -@@ -0,0 +1,1206 @@ +@@ -0,0 +1,1210 @@ +/* + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9723,9 +10181,11 @@ index 000000000..f06e7b51c +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "compiler/disassembler.hpp" ++#include "compiler/oopMap.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "interpreter/interpreter.hpp" ++#include "memory/universe.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" @@ -9733,6 +10193,7 @@ index 000000000..f06e7b51c +#include "register_riscv.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" +#include "runtime/vframe.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_riscv.inline.hpp" @@ -9740,11 +10201,11 @@ index 000000000..f06e7b51c + +// Implementation of StubAssembler + -+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { ++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, int args_size) { + // setup registers -+ assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, ++ assert(!(oop_result->is_valid() || metadata_result->is_valid()) || oop_result != metadata_result, + "registers must be different"); -+ assert(oop_result1 != xthread && metadata_result != xthread, "registers must be different"); ++ assert(oop_result != xthread && metadata_result != xthread, "registers must be different"); + assert(args_size >= 0, "illegal args_size"); + bool align_stack = false; + @@ -9780,7 +10241,7 @@ index 000000000..f06e7b51c + beqz(t0, L); + // exception pending => remove activation and forward to exception handler + // make sure that the vm_results are cleared -+ if (oop_result1->is_valid()) { ++ if (oop_result->is_valid()) { + sd(zr, Address(xthread, JavaThread::vm_result_offset())); + } + if (metadata_result->is_valid()) { @@ -9797,8 +10258,8 @@ index 000000000..f06e7b51c + bind(L); + } + // get oop results if there are any and reset the values in the thread -+ if (oop_result1->is_valid()) { -+ get_vm_result(oop_result1, xthread); ++ if (oop_result->is_valid()) { ++ get_vm_result(oop_result, xthread); + } + if (metadata_result->is_valid()) { + get_vm_result_2(metadata_result, xthread); @@ -9806,12 +10267,12 @@ index 000000000..f06e7b51c + return call_offset; +} + -+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) { ++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1) { + mv(c_rarg1, arg1); -+ return call_RT(oop_result1, metadata_result, entry, 1); ++ return call_RT(oop_result, metadata_result, entry, 1); +} + -+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) { ++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2) { + const int arg_num = 2; + if (c_rarg1 == arg2) { + if (c_rarg2 == arg1) { @@ -9826,10 +10287,10 @@ index 000000000..f06e7b51c + mv(c_rarg1, arg1); + mv(c_rarg2, arg2); + } -+ return call_RT(oop_result1, metadata_result, entry, arg_num); ++ return call_RT(oop_result, metadata_result, entry, arg_num); +} + -+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { ++int StubAssembler::call_RT(Register oop_result, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { + const int arg_num = 3; + // if there is any conflict use the stack + if (arg1 == c_rarg2 || arg1 == c_rarg3 || @@ -9838,21 +10299,21 @@ index 000000000..f06e7b51c + const int arg1_sp_offset = 0; + const int arg2_sp_offset = 1; + const int arg3_sp_offset = 2; -+ addi(sp, sp, -(arg_num * wordSize)); -+ sd(arg3, Address(sp, arg3_sp_offset * wordSize)); -+ sd(arg2, Address(sp, arg2_sp_offset * wordSize)); ++ addi(sp, sp, -(arg_num + 1) * wordSize); + sd(arg1, Address(sp, arg1_sp_offset * wordSize)); ++ sd(arg2, Address(sp, arg2_sp_offset * wordSize)); ++ sd(arg3, Address(sp, arg3_sp_offset * wordSize)); + + ld(c_rarg1, Address(sp, arg1_sp_offset * wordSize)); + ld(c_rarg2, Address(sp, arg2_sp_offset * wordSize)); + ld(c_rarg3, Address(sp, arg3_sp_offset * wordSize)); -+ addi(sp, sp, arg_num * wordSize); ++ addi(sp, sp, (arg_num + 1) * wordSize); + } else { + mv(c_rarg1, arg1); + mv(c_rarg2, arg2); + mv(c_rarg3, arg3); + } -+ return call_RT(oop_result1, metadata_result, entry, arg_num); ++ return call_RT(oop_result, metadata_result, entry, arg_num); +} + +// Implementation of StubFrame @@ -9919,7 +10380,7 @@ index 000000000..f06e7b51c +}; + +// Save off registers which might be killed by calls into the runtime. -+// Tries to smart of about FP registers. In particular we separate ++// Tries to smart of about FPU registers. In particular we separate +// saving and describing the FPU registers for deoptimization since we +// have to save the FPU registers twice if we describe them. The +// deopt blob is the only thing which needs to describe FPU registers. @@ -9936,11 +10397,12 @@ index 000000000..f06e7b51c + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + assert_cond(oop_map != NULL); + -+ // cpu_regs, caller save registers only, see FrameMap::initialize ++ // caller save registers only, see FrameMap::initialize + // in c1_FrameMap_riscv.cpp for detail. -+ const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {x7, x10, x11, x12, -+ x13, x14, x15, x16, x17, -+ x28, x29, x30, x31}; ++ const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = { ++ x7, x10, x11, x12, x13, x14, x15, x16, x17, x28, x29, x30, x31 ++ }; ++ + for (int i = 0; i < FrameMap::max_nof_caller_save_cpu_regs; i++) { + Register r = caller_save_cpu_regs[i]; + int sp_offset = cpu_reg_save_offsets[r->encoding()]; @@ -10165,7 +10627,7 @@ index 000000000..f06e7b51c + __ leave(); + __ ret(); // jump to exception handler + break; -+ default: ShouldNotReachHere(); ++ default: ShouldNotReachHere(); + } + + return oop_maps; @@ -10269,14 +10731,13 @@ index 000000000..f06e7b51c + __ reset_last_Java_frame(true); + + // check for pending exceptions -+ { -+ Label L; ++ { Label L; + __ ld(t0, Address(xthread, Thread::pending_exception_offset())); + __ beqz(t0, L); + // exception pending => remove activation and forward to exception handler + + { Label L1; -+ __ bnez(x10, L1); // have we deoptimized? ++ __ bnez(x10, L1); // have we deoptimized? + __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); + __ bind(L1); + } @@ -10295,7 +10756,7 @@ index 000000000..f06e7b51c + __ ld(x13, Address(fp, wordSize)); + +#ifdef ASSERT -+ // check that fields in JavaThread for exception oop and issuing pc are empty ++ // Check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ld(t0, Address(xthread, Thread::pending_exception_offset())); + __ beqz(t0, oop_empty); @@ -10334,6 +10795,7 @@ index 000000000..f06e7b51c + + // Will reexecute. Proper return address is already on the stack we just restore + // registers, pop all of our frame but the return address and jump to the deopt blob ++ + restore_live_registers(sasm); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); @@ -10693,7 +11155,7 @@ index 000000000..f06e7b51c + __ check_klass_subtype_slow_path(x14, x10, x12, x15, NULL, &miss); + + // fallthrough on success: -+ __ mv(t0, 1); ++ __ li(t0, 1); + __ sd(t0, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop_reg(RegSet::of(x10, x12, x14, x15), sp); + __ ret(); @@ -10883,7 +11345,7 @@ index 000000000..f06e7b51c + default: + { + StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); -+ __ mv(x10, (int)id); ++ __ li(x10, (int) id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10); + __ should_not_reach_here(); + } @@ -10898,14 +11360,13 @@ index 000000000..f06e7b51c +const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; } diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp new file mode 100644 -index 000000000..974c8fe76 +index 0000000000..9316d4be02 --- /dev/null +++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp -@@ -0,0 +1,72 @@ +@@ -0,0 +1,71 @@ +/* -+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -10962,7 +11423,7 @@ index 000000000..974c8fe76 +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); +define_pd_global(uintx, MetaspaceSize, 12*M ); +define_pd_global(bool, NeverActAsServerClassMachine, true ); -+define_pd_global(uint64_t,MaxRAM, 1ULL*G); ++define_pd_global(uint64_t, MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); +#endif // !TIERED +define_pd_global(bool, UseTypeProfile, false); @@ -10971,19 +11432,18 @@ index 000000000..974c8fe76 +define_pd_global(bool, LIRFillDelaySlots, false); +define_pd_global(bool, OptimizeSinglePrecision, true ); +define_pd_global(bool, CSEArrayLength, false); -+define_pd_global(bool, TwoOperandLIRForm, false ); ++define_pd_global(bool, TwoOperandLIRForm, false); + +#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp new file mode 100644 -index 000000000..bf4efa629 +index 0000000000..3da1f1c6d8 --- /dev/null +++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp -@@ -0,0 +1,91 @@ +@@ -0,0 +1,90 @@ +/* -+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -11029,7 +11489,7 @@ index 000000000..bf4efa629 + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 0); -+define_pd_global(intx, FLOATPRESSURE, 64); ++define_pd_global(intx, FLOATPRESSURE, 32); +define_pd_global(intx, FreqInlineSize, 325); +define_pd_global(intx, MinJumpTableSize, 10); +define_pd_global(intx, INTPRESSURE, 24); @@ -11059,7 +11519,7 @@ index 000000000..bf4efa629 +define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); +define_pd_global(intx, ProfiledCodeHeapSize, 22*M); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); -+define_pd_global(uintx, CodeCacheMinBlockLength, 4); ++define_pd_global(uintx, CodeCacheMinBlockLength, 6); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + +// Heap related flags @@ -11068,18 +11528,18 @@ index 000000000..bf4efa629 +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + -+define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. ++define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. + +#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp new file mode 100644 -index 000000000..3cb4a4995 +index 0000000000..cdbd69807b --- /dev/null +++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp @@ -0,0 +1,38 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -11117,12 +11577,12 @@ index 000000000..3cb4a4995 +} diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp new file mode 100644 -index 000000000..881900892 +index 0000000000..14a68b4502 --- /dev/null +++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp @@ -0,0 +1,36 @@ +/* -+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -11159,14 +11619,14 @@ index 000000000..881900892 +#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp new file mode 100644 -index 000000000..0354a93a0 +index 0000000000..a4de342a93 --- /dev/null +++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp -@@ -0,0 +1,154 @@ +@@ -0,0 +1,149 @@ +/* -+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -11239,8 +11699,8 @@ index 000000000..0354a93a0 +} + +int CompiledStaticCall::to_trampoline_stub_size() { -+ // Somewhat pessimistically, we count four instructions here (although -+ // there are only three) because we sometimes emit an alignment nop. ++ // Somewhat pessimistically, we count 4 instructions here (although ++ // there are only 3) because we sometimes emit an alignment nop. + // Trampoline stubs are always word aligned. + return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; +} @@ -11262,8 +11722,7 @@ index 000000000..0354a93a0 + } + + // Creation also verifies the object. -+ NativeMovConstReg* method_holder -+ = nativeMovConstReg_at(stub); ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); +#ifndef PRODUCT + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); + @@ -11288,8 +11747,7 @@ index 000000000..0354a93a0 + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + // Creation also verifies the object. -+ NativeMovConstReg* method_holder -+ = nativeMovConstReg_at(stub); ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + method_holder->set_data(0); +} + @@ -11300,16 +11758,13 @@ index 000000000..0354a93a0 +void CompiledDirectStaticCall::verify() { + // Verify call. + _call->verify(); -+ if (os::is_MP()) { -+ _call->verify_alignment(); -+ } ++ _call->verify_alignment(); + + // Verify stub. + address stub = find_stub(false /* is_aot */); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. -+ NativeMovConstReg* method_holder -+ = nativeMovConstReg_at(stub); ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // Verify state. @@ -11319,14 +11774,14 @@ index 000000000..0354a93a0 +#endif // !PRODUCT diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp new file mode 100644 -index 000000000..011e965ad +index 0000000000..05da242e35 --- /dev/null +++ b/src/hotspot/cpu/riscv/copy_riscv.hpp -@@ -0,0 +1,60 @@ +@@ -0,0 +1,59 @@ +/* -+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -11357,7 +11812,6 @@ index 000000000..011e965ad +// Contains inline asm implementations +#include OS_CPU_HEADER_INLINE(copy) + -+ +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*) tohw; + julong v = ((julong) value << 32) | value; @@ -11385,7 +11839,7 @@ index 000000000..011e965ad +#endif // CPU_RISCV_COPY_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp new file mode 100644 -index 000000000..31cee7103 +index 0000000000..e9ff307b64 --- /dev/null +++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp @@ -0,0 +1,32 @@ @@ -11423,14 +11877,14 @@ index 000000000..31cee7103 +#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp new file mode 100644 -index 000000000..e97b89327 +index 0000000000..06bca5298c --- /dev/null +++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp -@@ -0,0 +1,37 @@ +@@ -0,0 +1,38 @@ +/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -11444,7 +11898,8 @@ index 000000000..e97b89327 + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any @@ -11455,24 +11910,24 @@ index 000000000..e97b89327 +#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP +#define CPU_RISCV_DISASSEMBLER_RISCV_HPP + -+ static int pd_instruction_alignment() { -+ return 1; -+ } ++static int pd_instruction_alignment() { ++ return 1; ++} + -+ static const char* pd_cpu_opts() { -+ return ""; -+ } ++static const char* pd_cpu_opts() { ++ return ""; ++} + +#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp new file mode 100644 -index 000000000..be6f1a67f +index 0000000000..d4fcbdcbbd --- /dev/null +++ b/src/hotspot/cpu/riscv/frame_riscv.cpp -@@ -0,0 +1,683 @@ +@@ -0,0 +1,694 @@ +/* -+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -11497,9 +11952,10 @@ index 000000000..be6f1a67f + */ + +#include "precompiled.hpp" ++#include "compiler/oopMap.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" -+#include "oops/markOop.hpp" ++#include "memory/universe.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" @@ -11507,7 +11963,7 @@ index 000000000..be6f1a67f +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" -+#include "runtime/os.hpp" ++#include "runtime/os.inline.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" @@ -11526,19 +11982,19 @@ index 000000000..be6f1a67f +// Profiling/safepoint support + +bool frame::safe_for_sender(JavaThread *thread) { -+ address addr_sp = (address)_sp; -+ address addr_fp = (address)_fp; ++ address sp = (address)_sp; ++ address fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers + static size_t stack_guard_size = os::uses_stack_guard_pages() ? -+ (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0; -+ assert_cond(thread != NULL); ++ (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0; + size_t usable_stack_size = thread->stack_size() - stack_guard_size; + + // sp must be within the usable part of the stack (not in guards) -+ bool sp_safe = (addr_sp < thread->stack_base()) && -+ (addr_sp >= thread->stack_base() - usable_stack_size); ++ bool sp_safe = (sp < thread->stack_base()) && ++ (sp >= thread->stack_base() - usable_stack_size); ++ + + if (!sp_safe) { + return false; @@ -11565,8 +12021,7 @@ index 000000000..be6f1a67f + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 -+ bool fp_safe = (addr_fp < thread->stack_base() && (addr_fp > addr_sp) && -+ (((addr_fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); ++ bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); + + // We know sp/unextended_sp are safe only fp is questionable here + @@ -11630,9 +12085,10 @@ index 000000000..be6f1a67f + if ((address)sender_sp >= thread->stack_base()) { + return false; + } ++ + sender_unextended_sp = sender_sp; -+ sender_pc = (address) *(sender_sp + frame::return_addr_offset); -+ saved_fp = (intptr_t*) *(sender_sp + frame::link_offset); ++ sender_pc = (address) *(sender_sp - 1); ++ saved_fp = (intptr_t*) *(sender_sp - 2); + } + + @@ -11642,6 +12098,7 @@ index 000000000..be6f1a67f + // fp is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp + // is really a frame pointer. ++ + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { @@ -11996,13 +12453,12 @@ index 000000000..be6f1a67f + // do some validation of frame elements + + // first the method -+ + Method* m = *interpreter_frame_method_addr(); -+ + // validate the method we'd find in this potential sender + if (!Method::is_valid_method(m)) { + return false; + } ++ + // stack frames shouldn't be much larger than max_stack elements + // this test requires the use of unextended_sp which is the sp as seen by + // the current frame, and not sp which is the "raw" pc which could point @@ -12013,7 +12469,7 @@ index 000000000..be6f1a67f + } + + // validate bci/bcx -+ address bcp = interpreter_frame_bcp(); ++ address bcp = interpreter_frame_bcp(); + if (m->validate_bci_from_bcp(bcp) < 0) { + return false; + } @@ -12023,12 +12479,22 @@ index 000000000..be6f1a67f + if (MetaspaceObj::is_valid(cp) == false) { + return false; + } ++ + // validate locals -+ address locals = (address) *interpreter_frame_locals_addr(); ++ address locals = (address) *interpreter_frame_locals_addr(); ++ if (locals > thread->stack_base()) { ++ return false; ++ } + -+ if (locals > thread->stack_base() || locals < (address) fp()) { ++ if (m->max_locals() > 0 && locals < (address) fp()) { ++ // fp in interpreter frame on RISC-V is higher than that on AArch64, ++ // pointing to sender_sp and sender_sp-2 relatively. ++ // On RISC-V, if max_locals is 0, the 'locals' pointer may be below fp, ++ // pointing to sender_sp-1 (with one padding slot). ++ // So we verify the 'locals' pointer only if max_locals > 0. + return false; + } ++ + // We'd have to be pretty unlucky to be mislead at this point + return true; +} @@ -12155,14 +12621,13 @@ index 000000000..be6f1a67f +} diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp new file mode 100644 -index 000000000..7acabcbba +index 0000000000..18e021dcb9 --- /dev/null +++ b/src/hotspot/cpu/riscv/frame_riscv.hpp -@@ -0,0 +1,200 @@ +@@ -0,0 +1,199 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -12212,7 +12677,7 @@ index 000000000..7acabcbba +// [padding ] + +// [methodData ] = mdp() mdx_offset -+// [methodOop ] = method() method_offset ++// [Method ] = method() method_offset + +// [last esp ] = last_sp() last_sp_offset +// [old stack pointer ] (sender_sp) sender_sp_offset @@ -12354,19 +12819,19 @@ index 000000000..7acabcbba + static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + + // deoptimization support -+ void interpreter_frame_set_last_sp(intptr_t* ptr_sp); ++ void interpreter_frame_set_last_sp(intptr_t* last_sp); + + static jint interpreter_frame_expression_stack_direction() { return -1; } + +#endif // CPU_RISCV_FRAME_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp new file mode 100644 -index 000000000..5bc6b430c +index 0000000000..abd5bda7e4 --- /dev/null +++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp -@@ -0,0 +1,257 @@ +@@ -0,0 +1,245 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -12504,11 +12969,6 @@ index 000000000..5bc6b430c +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + -+// Relationals on frames based -+ -+// Return true if the frame is younger (more recent activation) than the frame represented by id -+inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); -+ return this->id() < id ; } +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } @@ -12604,33 +13064,26 @@ index 000000000..5bc6b430c +// Compiled frames +inline oop frame::saved_oop_result(RegisterMap* map) const { + oop* result_adr = (oop *)map->location(x10->as_VMReg()); -+ if(result_adr != NULL) { -+ return (*result_adr); -+ } else { -+ ShouldNotReachHere(); -+ return NULL; -+ } ++ guarantee(result_adr != NULL, "bad register save location"); ++ return (*result_adr); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + oop* result_adr = (oop *)map->location(x10->as_VMReg()); -+ if(result_adr != NULL) { -+ *result_adr = obj; -+ } else { -+ ShouldNotReachHere(); -+ } ++ guarantee(result_adr != NULL, "bad register save location"); ++ *result_adr = obj; +} + +#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp new file mode 100644 -index 000000000..6f778956d +index 0000000000..e191cbcee2 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp -@@ -0,0 +1,479 @@ +@@ -0,0 +1,481 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -12675,6 +13128,7 @@ index 000000000..6f778956d + +void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count, RegSet saved_regs) { ++ assert_cond(masm != NULL); + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + if (!dest_uninitialized) { + Label done; @@ -12717,6 +13171,7 @@ index 000000000..6f778956d + +void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs) { ++ assert_cond(masm != NULL); + __ push_reg(saved_regs, sp); + assert_different_registers(start, count, tmp); + assert_different_registers(c_rarg0, count); @@ -12736,7 +13191,8 @@ index 000000000..6f778956d + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. -+ ++ ++ assert_cond(masm != NULL); + assert(thread == xthread, "must be"); + + Label done; @@ -12784,21 +13240,15 @@ index 000000000..6f778956d + __ j(done); + + __ bind(runtime); -+ // save the live input values -+ RegSet saved = RegSet::of(pre_val); -+ if (tosca_live) { saved += RegSet::of(x10); } -+ if (obj != noreg) { saved += RegSet::of(obj); } -+ -+ __ push_reg(saved, sp); + ++ __ push_call_clobbered_registers(); + if (expand_call) { + assert(pre_val != c_rarg1, "smashed arg"); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } -+ -+ __ pop_reg(saved, sp); ++ __ pop_call_clobbered_registers(); + + __ bind(done); + @@ -12810,6 +13260,7 @@ index 000000000..6f778956d + Register thread, + Register tmp, + Register tmp2) { ++ assert_cond(masm != NULL); + assert(thread == xthread, "must be"); + assert_different_registers(store_addr, new_val, thread, tmp, tmp2, + t0); @@ -12839,6 +13290,7 @@ index 000000000..6f778956d + + // storing region crossing non-NULL, is card already dirty? + ++ ExternalAddress cardtable((address) ct->byte_map_base()); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + const Register card_addr = tmp; + @@ -12885,7 +13337,8 @@ index 000000000..6f778956d + +void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { -+ bool on_oop = type == T_OBJECT || type == T_ARRAY; ++ assert_cond(masm != NULL); ++ bool on_oop = is_reference_type(type); + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; @@ -12907,16 +13360,19 @@ index 000000000..6f778956d +} + +void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ assert_cond(masm != NULL); + // flatten object address if needed + if (dst.offset() == 0) { -+ __ mv(tmp3, dst.base()); ++ if (dst.base() != x13) { ++ __ mv(x13, dst.base()); ++ } + } else { -+ __ la(tmp3, dst); ++ __ la(x13, dst); + } + + g1_write_barrier_pre(masm, -+ tmp3 /* obj */, ++ x13 /* obj */, + tmp2 /* pre_val */, + xthread /* thread */, + tmp1 /* tmp */, @@ -12924,7 +13380,7 @@ index 000000000..6f778956d + false /* expand_call */); + + if (val == noreg) { -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg); ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); + } else { + // G1 barrier needs uncompressed oop for region cross check. + Register new_val = val; @@ -12932,9 +13388,9 @@ index 000000000..6f778956d + new_val = t1; + __ mv(new_val, val); + } -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); + g1_write_barrier_post(masm, -+ tmp3 /* store_adr */, ++ x13 /* store_adr */, + new_val /* new_val */, + xthread /* thread */, + tmp1 /* tmp */, @@ -12961,8 +13417,7 @@ index 000000000..6f778956d + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { -+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), -+ false /* wide */, false /* unaligned */); ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); + } + __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->pre_val()->as_register(), 0); @@ -13109,13 +13564,13 @@ index 000000000..6f778956d +#endif // COMPILER1 diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp new file mode 100644 -index 000000000..7f85e002d +index 0000000000..37bc183f39 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13175,7 +13630,7 @@ index 000000000..7f85e002d + Register tmp2); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); ++ Address dst, Register val, Register tmp1, Register tmp2); + +public: +#ifdef COMPILER1 @@ -13191,15 +13646,52 @@ index 000000000..7f85e002d +}; + +#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp +new file mode 100644 +index 0000000000..8735fd014f +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/g1/g1Globals_riscv.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP ++#define CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP ++ ++const size_t G1MergeHeapRootsPrefetchCacheSize = 16; ++ ++#endif // CPU_RISCV_GC_G1_G1GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp new file mode 100644 -index 000000000..203b82744 +index 0000000000..2b556b95d7 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp -@@ -0,0 +1,226 @@ +@@ -0,0 +1,231 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13223,15 +13715,23 @@ index 000000000..203b82744 + */ + +#include "precompiled.hpp" ++#include "classfile/classLoaderData.hpp" ++#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "memory/universe.hpp" +#include "runtime/jniHandles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" +#include "runtime/thread.hpp" + +#define __ masm-> + +void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { ++ assert_cond(masm != NULL); ++ + // RA is live. It must be saved around calls. + + bool in_heap = (decorators & IN_HEAP) != 0; @@ -13271,7 +13771,8 @@ index 000000000..203b82744 +} + +void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ assert_cond(masm != NULL); + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + switch (type) { @@ -13311,16 +13812,9 @@ index 000000000..203b82744 + +} + -+void BarrierSetAssembler::obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far) { -+ __ beq(obj1, obj2, equal, is_far); -+} -+ -+void BarrierSetAssembler::obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far) { -+ __ bne(obj1, obj2, nequal, is_far); -+} -+ +void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath) { ++ assert_cond(masm != NULL); + // If mask changes we need to ensure that the inverse is still encodable as an immediate + STATIC_ASSERT(JNIHandles::weak_tag_mask == 1); + __ andi(obj, obj, ~JNIHandles::weak_tag_mask); @@ -13335,6 +13829,7 @@ index 000000000..203b82744 + Register tmp2, + Label& slow_case, + bool is_far) { ++ assert_cond(masm != NULL); + assert_different_registers(obj, tmp2); + assert_different_registers(obj, var_size_in_bytes); + Register end = tmp2; @@ -13364,6 +13859,7 @@ index 000000000..203b82744 + Register tmp1, + Label& slow_case, + bool is_far) { ++ assert_cond(masm != NULL); + assert_different_registers(obj, var_size_in_bytes, tmp1); + if (!Universe::heap()->supports_inline_contig_alloc()) { + __ j(slow_case); @@ -13404,7 +13900,7 @@ index 000000000..203b82744 + // If heap_top hasn't been changed by some other thread, update it. + __ sc_d(t1, end, t0, Assembler::rl); + __ bnez(t1, retry); -+ ++ + incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1); + } +} @@ -13413,6 +13909,7 @@ index 000000000..203b82744 + Register var_size_in_bytes, + int con_size_in_bytes, + Register tmp1) { ++ assert_cond(masm != NULL); + assert(tmp1->is_valid(), "need temp reg"); + + __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); @@ -13425,13 +13922,13 @@ index 000000000..203b82744 +} diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp new file mode 100644 -index 000000000..964fc28be +index 0000000000..984d94f4c3 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp -@@ -0,0 +1,75 @@ +@@ -0,0 +1,76 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13458,6 +13955,7 @@ index 000000000..964fc28be +#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP + +#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" +#include "memory/allocation.hpp" +#include "oops/access.hpp" + @@ -13475,9 +13973,8 @@ index 000000000..964fc28be + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); -+ virtual void obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far = false); -+ virtual void obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far = false); ++ Address dst, Register val, Register tmp1, Register tmp2); ++ + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + @@ -13488,7 +13985,7 @@ index 000000000..964fc28be + Register tmp1, // temp register + Register tmp2, // temp register + Label& slow_case, // continuation point if fast allocation fails -+ bool is_far = false // the distance of label slowcase could be more than 12KiB in C1 ++ bool is_far = false + ); + + void eden_allocate(MacroAssembler* masm, @@ -13497,22 +13994,23 @@ index 000000000..964fc28be + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register + Label& slow_case, // continuation point if fast allocation fails -+ bool is_far = false // the distance of label slowcase could be more than 12KiB in C1 ++ bool is_far = false + ); + virtual void barrier_stubs_init() {} ++ + virtual ~BarrierSetAssembler() {} +}; + +#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp new file mode 100644 -index 000000000..1720488fb +index 0000000000..81d47d61d4 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,120 @@ +@@ -0,0 +1,125 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13541,12 +14039,14 @@ index 000000000..1720488fb +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/cardTableBarrierSetAssembler.hpp" ++#include "gc/shared/gc_globals.hpp" +#include "interpreter/interp_masm.hpp" + +#define __ masm-> + + +void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) { ++ assert_cond(masm != NULL); + assert_different_registers(obj, tmp); + BarrierSet* bs = BarrierSet::barrier_set(); + assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); @@ -13579,8 +14079,10 @@ index 000000000..1720488fb + +void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs) { ++ assert_cond(masm != NULL); + assert_different_registers(start, tmp); + assert_different_registers(count, tmp); ++ + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); @@ -13612,33 +14114,34 @@ index 000000000..1720488fb +} + +void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool is_array = (decorators & IS_ARRAY) != 0; + bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool precise = is_array || on_anonymous; + + bool needs_post_barrier = val != noreg && in_heap; -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg, noreg); ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); + if (needs_post_barrier) { + // flatten object address if needed + if (!precise || dst.offset() == 0) { -+ store_check(masm, dst.base(), tmp3); ++ store_check(masm, dst.base(), x13); + } else { -+ __ la(tmp3, dst); -+ store_check(masm, tmp3, t0); ++ assert_cond(masm != NULL); ++ __ la(x13, dst); ++ store_check(masm, x13, t0); + } + } +} diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp new file mode 100644 -index 000000000..a5b3f9fe8 +index 0000000000..686fe8fa47 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp -@@ -0,0 +1,43 @@ +@@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13674,20 +14177,19 @@ index 000000000..a5b3f9fe8 + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register count, Register tmp, RegSet saved_regs); + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); -+ ++ Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp new file mode 100644 -index 000000000..b82275297 +index 0000000000..7aa2015f9e --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,54 @@ +@@ -0,0 +1,55 @@ +/* -+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13718,6 +14220,7 @@ index 000000000..b82275297 + +void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register src, Register dst, Register count, RegSet saved_regs) { ++ + if (is_oop) { + gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); + } @@ -13732,22 +14235,22 @@ index 000000000..b82275297 +} + +void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { -+ if (type == T_OBJECT || type == T_ARRAY) { -+ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ if (is_reference_type(type)) { ++ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } else { -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } +} diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp new file mode 100644 -index 000000000..df206cc87 +index 0000000000..00419c3163 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13788,7 +14291,7 @@ index 000000000..df206cc87 + Register start, Register count, Register tmp, RegSet saved_regs) {} + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) = 0; ++ Address dst, Register val, Register tmp1, Register tmp2) = 0; + +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, @@ -13796,18 +14299,18 @@ index 000000000..df206cc87 + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register start, Register count, Register tmp, RegSet saved_regs); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); ++ Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp new file mode 100644 -index 000000000..6657f1be0 +index 0000000000..d19f5b859c --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp -@@ -0,0 +1,124 @@ +@@ -0,0 +1,117 @@ +/* -+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -13834,6 +14337,7 @@ index 000000000..6657f1be0 +#include "precompiled.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" ++#include "gc/shared/gc_globals.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" +#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" @@ -13859,14 +14363,6 @@ index 000000000..6657f1be0 + + ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq, + /* release */ Assembler::rl, /* is_cae */ false, result); -+ if (UseBarriersForVolatile) { -+ // The membar here is necessary to prevent reordering between the -+ // release store in the CAS above and a subsequent volatile load. -+ // However for !UseBarriersForVolatile, C1 inserts a full barrier before -+ // volatile loads which means we don't need an additional barrier -+ // here (see LIRGenerator::volatile_field_load()). -+ __ membar(MacroAssembler::AnyAny); -+ } +} + +#undef __ @@ -13932,13 +14428,13 @@ index 000000000..6657f1be0 +} diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp new file mode 100644 -index 000000000..1bc01e454 +index 0000000000..b8534c52e7 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp -@@ -0,0 +1,743 @@ +@@ -0,0 +1,715 @@ +/* + * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13988,8 +14484,8 @@ index 000000000..1bc01e454 + Register src, Register dst, Register count, RegSet saved_regs) { + if (is_oop) { + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; -+ if ((ShenandoahSATBBarrier && !dest_uninitialized) || -+ ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { ++ if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { ++ + Label done; + + // Avoid calling runtime if count == 0 @@ -14056,10 +14552,10 @@ index 000000000..1bc01e454 + Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); + + // Is marking active? -+ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ if (in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 4) { + __ lwu(tmp, in_progress); + } else { -+ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ assert(in_bytes(ShenandoahSATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(tmp, in_progress); + } + __ beqz(tmp, done); @@ -14139,7 +14635,7 @@ index 000000000..1bc01e454 + // - Test lowest two bits == 0 + // - If so, set the lowest two bits + // - Invert the result back, and copy to dst -+ RegSet savedRegs = RegSet::of(t2); ++ RegSet saved_regs = RegSet::of(t2); + bool borrow_reg = (tmp == noreg); + if (borrow_reg) { + // No free registers available. Make one useful. @@ -14147,11 +14643,11 @@ index 000000000..1bc01e454 + if (tmp == dst) { + tmp = t1; + } -+ savedRegs += RegSet::of(tmp); ++ saved_regs += RegSet::of(tmp); + } + + assert_different_registers(tmp, dst, t2); -+ __ push_reg(savedRegs, sp); ++ __ push_reg(saved_regs, sp); + + Label done; + __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); @@ -14162,14 +14658,15 @@ index 000000000..1bc01e454 + __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 + __ bind(done); + -+ __ pop_reg(savedRegs, sp); ++ __ pop_reg(saved_regs, sp); +} + +void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, -+ Register dst, Address load_addr) { ++ Register dst, ++ Address load_addr) { + assert(ShenandoahLoadRefBarrier, "Should be enabled"); + assert(dst != t1 && load_addr.base() != t1, "need t1"); -+ assert_different_registers(load_addr.base(), t1, t2); ++ assert_different_registers(load_addr.base(), t0, t1); + + Label done; + __ enter(); @@ -14188,15 +14685,15 @@ index 000000000..1bc01e454 + } + + // Save x10 and x11, unless it is an output register -+ RegSet to_save = RegSet::of(x10, x11) - result_dst; -+ __ push_reg(to_save, sp); ++ RegSet saved_regs = RegSet::of(x10, x11) - result_dst; ++ __ push_reg(saved_regs, sp); + __ la(x11, load_addr); + __ mv(x10, dst); + + __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); + + __ mv(result_dst, x10); -+ __ pop_reg(to_save, sp); ++ __ pop_reg(saved_regs, sp); + + __ bind(done); + __ leave(); @@ -14205,7 +14702,9 @@ index 000000000..1bc01e454 +void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) { + if (ShenandoahIUBarrier) { + __ push_call_clobbered_registers(); ++ + satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false); ++ + __ pop_call_clobbered_registers(); + } +} @@ -14249,16 +14748,14 @@ index 000000000..1bc01e454 + + // 2: load a reference from src location and apply LRB if needed + if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { -+ guarantee(dst != x30 && src.base() != x30, "load_at need x30"); -+ bool ist5 = (dst == src.base()); -+ if (ist5) { -+ __ push_reg(RegSet::of(x30), sp); -+ } + Register result_dst = dst; + + // Preserve src location for LRB ++ RegSet saved_regs; + if (dst == src.base()) { -+ dst = x30; ++ dst = (src.base() == x28) ? x29 : x28; ++ saved_regs = RegSet::of(dst); ++ __ push_reg(saved_regs, sp); + } + assert_different_registers(dst, src.base()); + @@ -14271,8 +14768,8 @@ index 000000000..1bc01e454 + dst = result_dst; + } + -+ if (ist5) { -+ __ pop_reg(RegSet::of(x30), sp); ++ if (saved_regs.bits() != 0) { ++ __ pop_reg(saved_regs, sp); + } + } else { + BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); @@ -14295,24 +14792,24 @@ index 000000000..1bc01e454 +} + +void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ Address dst, Register val, Register tmp1, Register tmp2) { + bool on_oop = is_reference_type(type); + if (!on_oop) { -+ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); + return; + } + + // flatten object address if needed + if (dst.offset() == 0) { -+ if (dst.base() != tmp3) { -+ __ mv(tmp3, dst.base()); ++ if (dst.base() != x13) { ++ __ mv(x13, dst.base()); + } + } else { -+ __ la(tmp3, dst); ++ __ la(x13, dst); + } + + shenandoah_write_barrier_pre(masm, -+ tmp3 /* obj */, ++ x13 /* obj */, + tmp2 /* pre_val */, + xthread /* thread */, + tmp1 /* tmp */, @@ -14320,7 +14817,7 @@ index 000000000..1bc01e454 + false /* expand_call */); + + if (val == noreg) { -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg); ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), noreg, noreg, noreg); + } else { + iu_barrier(masm, val, tmp1); + // G1 barrier needs uncompressed oop for region cross check. @@ -14329,7 +14826,7 @@ index 000000000..1bc01e454 + new_val = t1; + __ mv(new_val, val); + } -+ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(x13, 0), val, noreg, noreg); + } +} + @@ -14370,39 +14867,10 @@ index 000000000..1bc01e454 +// from-space, or it refers to the to-space version of an object that +// is being evacuated out of from-space. +// -+// By default, this operation implements sequential consistency and the -+// value held in the result register following execution of the -+// generated code sequence is 0 to indicate failure of CAS, non-zero -+// to indicate success. Arguments support variations on this theme: -+// -+// acquire: Allow relaxation of the memory ordering on CAS from -+// sequential consistency. This can be useful when -+// sequential consistency is not required, such as when -+// another sequentially consistent operation is already -+// present in the execution stream. If acquire, successful -+// execution has the side effect of assuring that memory -+// values updated by other threads and "released" will be -+// visible to any read operations perfomed by this thread -+// which follow this operation in program order. This is a -+// special optimization that should not be enabled by default. -+// release: Allow relaxation of the memory ordering on CAS from -+// sequential consistency. This can be useful when -+// sequential consistency is not required, such as when -+// another sequentially consistent operation is already -+// present in the execution stream. If release, successful -+// completion of this operation has the side effect of -+// assuring that all writes to memory performed by this -+// thread that precede this operation in program order are -+// visible to all other threads that subsequently "acquire" -+// before reading the respective memory values. This is a -+// special optimization that should not be enabled by default. -+// is_cae: This turns CAS (compare and swap) into CAE (compare and -+// exchange). This HotSpot convention is that CAE makes -+// available to the caller the "failure witness", which is -+// the value that was stored in memory which did not match -+// the expected value. If is_cae, the result is the value -+// most recently fetched from addr rather than a boolean -+// success indicator. ++// By default the value held in the result register following execution ++// of the generated code sequence is 0 to indicate failure of CAS, ++// non-zero to indicate success. If is_cae, the result is the value most ++// recently fetched from addr rather than a boolean success indicator. +// +// Clobbers t0, t1 +void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, @@ -14452,7 +14920,7 @@ index 000000000..1bc01e454 + if (is_cae) { + __ mv(result, expected); + } else { -+ __ mv(result, 1); ++ __ addi(result, zr, 1); + } + __ j(done); + @@ -14485,8 +14953,7 @@ index 000000000..1bc01e454 + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { -+ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), -+ stub->info(), false /* wide */, false /* unaligned */); ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /* wide */, false /*unaligned*/); + } + __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); + ce->store_parameter(stub->pre_val()->as_register(), 0); @@ -14598,6 +15065,7 @@ index 000000000..1bc01e454 + __ push_call_clobbered_registers(); + __ load_parameter(0, x10); + __ load_parameter(1, x11); ++ + if (UseCompressedOops) { + __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); + } else { @@ -14681,13 +15149,13 @@ index 000000000..1bc01e454 +} diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp new file mode 100644 -index 000000000..84bc55706 +index 0000000000..5d75035e9d --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp -@@ -0,0 +1,92 @@ +@@ -0,0 +1,97 @@ +/* -+ * Copyright (c) 2018, Red Hat, Inc. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -14715,6 +15183,7 @@ index 000000000..84bc55706 + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" +#ifdef COMPILER1 +class LIR_Assembler; +class ShenandoahPreBarrierStub; @@ -14724,31 +15193,6 @@ index 000000000..84bc55706 +class StubCodeGenerator; + +class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { -+public: -+ static address shenandoah_lrb(); -+ -+ void iu_barrier(MacroAssembler *masm, Register dst, Register tmp); -+ -+#ifdef COMPILER1 -+ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); -+ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); -+ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); -+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm); -+#endif -+ -+ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, -+ Register src, Register dst, Register count, RegSet saved_regs); -+ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Register dst, Address src, Register tmp1, Register tmp_thread); -+ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, -+ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); -+ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, -+ Register obj, Register tmp, Label& slowpath); -+ virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, -+ Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); -+ -+ virtual void barrier_stubs_init(); -+ +private: + + static address _shenandoah_lrb; @@ -14774,15 +15218,44 @@ index 000000000..84bc55706 + void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr); + + address generate_shenandoah_lrb(StubCodeGenerator* cgen); ++ ++public: ++ ++ static address shenandoah_lrb(); ++ ++ void iu_barrier(MacroAssembler* masm, Register dst, Register tmp); ++ ++#ifdef COMPILER1 ++ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); ++ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm); ++#endif ++ ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs); ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); ++ ++ virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); ++ ++ virtual void barrier_stubs_init(); +}; + +#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad new file mode 100644 -index 000000000..6e310697d +index 0000000000..bab407a8b7 --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad -@@ -0,0 +1,188 @@ +@@ -0,0 +1,197 @@ +// +// Copyright (c) 2018, Red Hat, Inc. All rights reserved. +// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. @@ -14856,7 +15329,7 @@ index 000000000..6e310697d +%} + +instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + @@ -14878,7 +15351,7 @@ index 000000000..6e310697d +%} + +instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ predicate(needs_acquiring_load_reserved(n)); + match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + @@ -14903,9 +15376,11 @@ index 000000000..6e310697d + match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); + ins_cost(10 * DEFAULT_COST); + effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ + format %{ + "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah" + %} ++ + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. @@ -14913,6 +15388,7 @@ index 000000000..6e310697d + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + true /* is_cae */, $res$$Register); + %} ++ + ins_pipe(pipe_slow); +%} + @@ -14924,6 +15400,7 @@ index 000000000..6e310697d + format %{ + "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah" + %} ++ + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. @@ -14931,6 +15408,7 @@ index 000000000..6e310697d + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + true /* is_cae */, $res$$Register); + %} ++ + ins_pipe(pipe_slow); +%} + @@ -14943,6 +15421,7 @@ index 000000000..6e310697d + "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah" + "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" + %} ++ + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. @@ -14951,6 +15430,7 @@ index 000000000..6e310697d + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} ++ + ins_pipe(pipe_slow); +%} + @@ -14962,6 +15442,7 @@ index 000000000..6e310697d + format %{ + "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah" + %} ++ + ins_encode %{ + Register tmp = $tmp$$Register; + __ mv(tmp, $oldval$$Register); // Must not clobber oldval. @@ -14969,18 +15450,19 @@ index 000000000..6e310697d + Assembler::relaxed /* acquire */, Assembler::rl /* release */, + false /* is_cae */, $res$$Register); + %} ++ + ins_pipe(pipe_slow); +%} diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp new file mode 100644 -index 000000000..96068e637 +index 0000000000..d6ce8da07b --- /dev/null +++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp -@@ -0,0 +1,44 @@ +@@ -0,0 +1,46 @@ +/* -+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15012,6 +15494,8 @@ index 000000000..96068e637 +// 32-bit integer argument values are extended to 64 bits. +const bool CCallingConventionRequiresIntsAsLongs = false; + ++// To be safe, we deoptimize when we come across an access that needs ++// patching. This is similar to what is done on aarch64. +#define DEOPTIMIZE_WHEN_PATCHING + +#define SUPPORTS_NATIVE_CX8 @@ -15023,14 +15507,13 @@ index 000000000..96068e637 +#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp new file mode 100644 -index 000000000..b46661a8f +index 0000000000..90db2f4460 --- /dev/null +++ b/src/hotspot/cpu/riscv/globals_riscv.hpp -@@ -0,0 +1,120 @@ +@@ -0,0 +1,111 @@ +/* -+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15123,12 +15606,6 @@ index 000000000..b46661a8f + \ + product(bool, NearCpool, true, \ + "constant pool is close to instructions") \ -+ product(bool, UseBarriersForVolatile, false, \ -+ "Use memory barriers to implement volatile accesses") \ -+ product(bool, UseCRC32, false, \ -+ "Use CRC32 instructions for CRC32 computation") \ -+ product(bool, UseBlockZeroing, true, \ -+ "Use DC ZVA for block zeroing") \ + product(intx, BlockZeroingLowLimit, 256, \ + "Minimum size in bytes when block zeroing will be used") \ + range(1, max_jint) \ @@ -15138,25 +15615,23 @@ index 000000000..b46661a8f + "Extend i for r and o for w in the pred/succ flags of fence") \ + product(bool, AvoidUnalignedAccesses, true, \ + "Avoid generating unaligned memory accesses") \ -+ product(intx, EagerArrayCopyThreshold, 128, \ -+ "Threshod of array length by bytes to " \ -+ "trigger the eager array copy") \ -+ range(0, 65535) \ + experimental(bool, UseRVV, false, "Use RVV instructions") \ + experimental(bool, UseZba, false, "Use Zba instructions") \ -+ experimental(bool, UseZbb, false, "Use Zbb instructions") ++ experimental(bool, UseZbb, false, "Use Zbb instructions") \ ++ experimental(bool, UseZbs, false, "Use Zbs instructions") \ ++ experimental(bool, UseRVC, false, "Use RVC instructions") + +#endif // CPU_RISCV_GLOBALS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp new file mode 100644 -index 000000000..980b2a81b +index 0000000000..cc93103dc5 --- /dev/null +++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15191,7 +15666,7 @@ index 000000000..980b2a81b + +int InlineCacheBuffer::ic_stub_code_size() { + // 6: auipc + ld + auipc + jalr + address(2 * instruction_size) -+ // 5: auipc + ld + j + address(2 * instruction_size ) ++ // 5: auipc + ld + j + address(2 * instruction_size) + return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size; +} + @@ -15234,13 +15709,14 @@ index 000000000..980b2a81b +} diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp new file mode 100644 -index 000000000..ed8022784 +index 0000000000..d615dcfb9e --- /dev/null +++ b/src/hotspot/cpu/riscv/icache_riscv.cpp -@@ -0,0 +1,61 @@ +@@ -0,0 +1,68 @@ +/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2023, Rivos Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15264,28 +15740,34 @@ index 000000000..ed8022784 + */ + +#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "riscv_flush_icache.hpp" ++#include "runtime/java.hpp" +#include "runtime/icache.hpp" -+#include "macroAssembler_riscv.hpp" + +#define __ _masm-> + +static int icache_flush(address addr, int lines, int magic) { + // To make a store to instruction memory visible to all RISC-V harts, + // the writing hart has to execute a data FENCE before requesting that -+ // all remote RISC-V harts execute a FENCE.I -+ // -+ // No such-assurance is defined at the interface level of the builtin -+ // method, and so we should make sure it works. ++ // all remote RISC-V harts execute a FENCE.I. ++ ++ // We need to make sure stores happens before the I/D cache synchronization. + __asm__ volatile("fence rw, rw" : : : "memory"); -+ -+ __builtin___clear_cache(addr, addr + (lines << ICache::log2_line_size)); ++ ++ RiscvFlushIcache::flush((uintptr_t)addr, ((uintptr_t)lines) << ICache::log2_line_size); ++ + return magic; +} + +void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { ++ // Only riscv_flush_icache is supported as I-cache synchronization. ++ // We must make sure the VM can execute such without error. ++ if (!RiscvFlushIcache::test()) { ++ vm_exit_during_initialization("Unable to synchronize I-cache"); ++ } + + address start = (address)icache_flush; -+ + *flush_icache_stub = (ICache::flush_icache_stub_t)start; + + // ICache::invalidate_range() contains explicit condition that the first @@ -15301,12 +15783,12 @@ index 000000000..ed8022784 +#undef __ diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp new file mode 100644 -index 000000000..a503d3be3 +index 0000000000..5bf40ca820 --- /dev/null +++ b/src/hotspot/cpu/riscv/icache_riscv.hpp @@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -15349,14 +15831,14 @@ index 000000000..a503d3be3 +#endif // CPU_RISCV_ICACHE_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp new file mode 100644 -index 000000000..91deb0ae2 +index 0000000000..b50be7e726 --- /dev/null +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp -@@ -0,0 +1,1932 @@ +@@ -0,0 +1,1931 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15388,7 +15870,6 @@ index 000000000..91deb0ae2 +#include "interpreter/interpreterRuntime.hpp" +#include "logging/log.hpp" +#include "oops/arrayOop.hpp" -+#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/methodData.hpp" +#include "prims/jvmtiExport.hpp" @@ -15400,7 +15881,6 @@ index 000000000..91deb0ae2 +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" + -+ +void InterpreterMacroAssembler::narrow(Register result) { + // Get method->_constMethod->_result_type + ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize)); @@ -15618,7 +16098,8 @@ index 000000000..91deb0ae2 + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // Convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset -+ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, ++ "else change next line"); + ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); + // skip past the header + add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); @@ -15864,11 +16345,11 @@ index 000000000..91deb0ae2 + if (needs_thread_local_poll) { + NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); + ld(t1, Address(xthread, Thread::polling_page_offset())); -+ andi(t1, t1, 1 << exact_log2(SafepointMechanism::poll_bit())); ++ andi(t1, t1, SafepointMechanism::poll_bit()); + bnez(t1, safepoint); + } + if (table == Interpreter::dispatch_table(state)) { -+ mv(t1, Interpreter::distance_from_dispatch_table(state)); ++ li(t1, Interpreter::distance_from_dispatch_table(state)); + add(t1, Rs, t1); + shadd(t1, t1, xdispatch, t1, 3); + } else { @@ -16082,6 +16563,7 @@ index 000000000..91deb0ae2 + + // restore sender esp + mv(esp, t1); ++ + // remove frame anchor + leave(); + // If we're returning to interpreted code we will shortly be @@ -16160,7 +16642,7 @@ index 000000000..91deb0ae2 + // least significant 3 bits clear. + // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg + sub(swap_reg, swap_reg, sp); -+ mv(t0, (int64_t)(7 - os::vm_page_size())); ++ li(t0, (int64_t)(7 - os::vm_page_size())); + andr(swap_reg, swap_reg, t0); + + // Save the test result, for recursive case, the result is zero @@ -16261,7 +16743,7 @@ index 000000000..91deb0ae2 +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; -+ push_reg(RegSet::of(x10, x11), sp); // save x10, x11 ++ push_reg(0xc00, sp); // save x10, x11 + + // Test MDO to avoid the call if it is NULL. + ld(x10, Address(xmethod, in_bytes(Method::method_data_offset()))); @@ -16274,7 +16756,7 @@ index 000000000..91deb0ae2 + add(x10, x11, x10); + sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); + bind(set_mdp); -+ pop_reg(RegSet::of(x10, x11), sp); ++ pop_reg(0xc00, sp); +} + +void InterpreterMacroAssembler::verify_method_data_pointer() { @@ -16414,7 +16896,7 @@ index 000000000..91deb0ae2 +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, + int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); -+ addi(mdp_in, mdp_in, constant); ++ addi(mdp_in, mdp_in, (unsigned)constant); + sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); +} + @@ -17012,7 +17494,7 @@ index 000000000..91deb0ae2 + + ld(t0, mdo_addr); + beqz(t0, none); -+ mv(tmp, (u1)TypeEntries::null_seen); ++ li(tmp, (u1)TypeEntries::null_seen); + beq(t0, tmp, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the @@ -17047,10 +17529,10 @@ index 000000000..91deb0ae2 + + lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start)); + if (is_virtual) { -+ mv(tmp, (u1)DataLayout::virtual_call_type_data_tag); ++ li(tmp, (u1)DataLayout::virtual_call_type_data_tag); + bne(t0, tmp, profile_continue); + } else { -+ mv(tmp, (u1)DataLayout::call_type_data_tag); ++ li(tmp, (u1)DataLayout::call_type_data_tag); + bne(t0, tmp, profile_continue); + } + @@ -17080,7 +17562,7 @@ index 000000000..91deb0ae2 + mv(index, zr); // index < TypeProfileArgsLimit + bind(loop); + bgtz(index, profileReturnType); -+ mv(t0, (int)MethodData::profile_return()); ++ li(t0, (int)MethodData::profile_return()); + beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false + bind(profileReturnType); + // If return value type is profiled we may have no argument to profile @@ -17088,7 +17570,7 @@ index 000000000..91deb0ae2 + mv(t1, - TypeStackSlotEntries::per_arg_count()); + mul(t1, index, t1); + add(tmp, tmp, t1); -+ mv(t1, TypeStackSlotEntries::per_arg_count()); ++ li(t1, TypeStackSlotEntries::per_arg_count()); + add(t0, mdp, off_to_args); + blt(tmp, t1, done); + @@ -17099,8 +17581,8 @@ index 000000000..91deb0ae2 + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list -+ mv(t0, stack_slot_offset0); -+ mv(t1, slot_step); ++ li(t0, stack_slot_offset0); ++ li(t1, slot_step); + mul(t1, index, t1); + add(t0, t0, t1); + add(t0, mdp, t0); @@ -17110,8 +17592,8 @@ index 000000000..91deb0ae2 + Address arg_addr = argument_address(tmp); + ld(tmp, arg_addr); + -+ mv(t0, argument_type_offset0); -+ mv(t1, type_step); ++ li(t0, argument_type_offset0); ++ li(t1, type_step); + mul(t1, index, t1); + add(t0, t0, t1); + add(mdo_addr, mdp, t0); @@ -17123,7 +17605,7 @@ index 000000000..91deb0ae2 + + // increment index by 1 + addi(index, index, 1); -+ mv(t1, TypeProfileArgsLimit); ++ li(t1, TypeProfileArgsLimit); + blt(index, t1, loop); + bind(loopEnd); + @@ -17178,13 +17660,13 @@ index 000000000..91deb0ae2 + // length + Label do_profile; + lbu(t0, Address(xbcp, 0)); -+ mv(tmp, (u1)Bytecodes::_invokedynamic); ++ li(tmp, (u1)Bytecodes::_invokedynamic); + beq(t0, tmp, do_profile); -+ mv(tmp, (u1)Bytecodes::_invokehandle); ++ li(tmp, (u1)Bytecodes::_invokehandle); + beq(t0, tmp, do_profile); + get_method(tmp); + lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); -+ mv(t1, vmIntrinsics::_compiledLambdaForm); ++ li(t1, vmIntrinsics::_compiledLambdaForm); + bne(t0, t1, profile_continue); + bind(do_profile); + } @@ -17227,7 +17709,6 @@ index 000000000..91deb0ae2 + add(t0, mdp, off_base); + add(t1, mdp, type_base); + -+ + shadd(tmp2, tmp1, t0, tmp2, per_arg_scale); + // load offset on the stack from the slot for this parameter + ld(tmp2, Address(tmp2, 0)); @@ -17287,12 +17768,12 @@ index 000000000..91deb0ae2 +#endif diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp new file mode 100644 -index 000000000..042ee8280 +index 0000000000..4126e8ee70 --- /dev/null +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp @@ -0,0 +1,283 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -17439,7 +17920,7 @@ index 000000000..042ee8280 + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + -+// Load float value from 'address'. The value is loaded onto the FPU register v0. ++ // Load float value from 'address'. The value is loaded onto the FPU register v0. + void load_float(Address src); + void load_double(Address src); + @@ -17576,14 +18057,14 @@ index 000000000..042ee8280 +#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp new file mode 100644 -index 000000000..777f326e3 +index 0000000000..776b078723 --- /dev/null +++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp -@@ -0,0 +1,296 @@ +@@ -0,0 +1,295 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -17637,8 +18118,9 @@ index 000000000..777f326e3 +FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() { + if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { + return g_FPArgReg[_num_reg_fp_args++]; ++ } else { ++ return fnoreg; + } -+ return fnoreg; +} + +int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() { @@ -17760,7 +18242,6 @@ index 000000000..777f326e3 + unsigned int _num_reg_int_args; + unsigned int _num_reg_fp_args; + -+ + intptr_t* single_slot_addr() { + intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; @@ -17793,7 +18274,6 @@ index 000000000..777f326e3 + *_to++ = value; + } + -+ + virtual void pass_int() { + jint value = *(jint*)single_slot_addr(); + if (pass_gpr(value) < 0) { @@ -17801,7 +18281,6 @@ index 000000000..777f326e3 + } + } + -+ + virtual void pass_long() { + intptr_t value = *double_slot_addr(); + if (pass_gpr(value) < 0) { @@ -17826,7 +18305,7 @@ index 000000000..777f326e3 + } + } + -+ virtual void pass_double() { ++ virtual void pass_double() { + intptr_t value = *double_slot_addr(); + int arg = pass_fpr(value); + if (0 <= arg) { @@ -17844,12 +18323,13 @@ index 000000000..777f326e3 + _to = to; + + _int_args = to - (method->is_static() ? 16 : 17); -+ _fp_args = to - 8; ++ _fp_args = to - 8; + _fp_identifiers = to - 9; + *(int*) _fp_identifiers = 0; + _num_reg_int_args = (method->is_static() ? 1 : 0); + _num_reg_fp_args = 0; + } ++ + ~SlowSignatureHandler() + { + _from = NULL; @@ -17871,19 +18351,19 @@ index 000000000..777f326e3 + + // handle arguments + SlowSignatureHandler ssh(m, (address)from, to); -+ ssh.iterate((uint64_t)UCONST64(-1)); ++ ssh.iterate(UCONST64(-1)); + + // return result handler + return Interpreter::result_handler(m->result_type()); +IRT_END diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp new file mode 100644 -index 000000000..06342869f +index 0000000000..05df63ba2a --- /dev/null +++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp @@ -0,0 +1,68 @@ +/* -+ * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -17952,14 +18432,13 @@ index 000000000..06342869f +#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp new file mode 100644 -index 000000000..a169b8c5f +index 0000000000..5a0c9b812f --- /dev/null +++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp @@ -0,0 +1,89 @@ +/* -+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -17988,7 +18467,7 @@ index 000000000..a169b8c5f +private: + + // FP value associated with _last_Java_sp: -+ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + +public: + // Each arch must define reset, save, restore @@ -18038,23 +18517,24 @@ index 000000000..a169b8c5f + +public: + -+ void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } ++ void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } ++ ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } + -+ intptr_t* last_Java_fp(void) { return _last_Java_fp; } + // Assert (last_Java_sp == NULL || fp == NULL) -+ void set_last_Java_fp(intptr_t* java_fp) { OrderAccess::release(); _last_Java_fp = java_fp; } ++ void set_last_Java_fp(intptr_t* fp) { OrderAccess::release(); _last_Java_fp = fp; } + +#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp new file mode 100644 -index 000000000..9bab8e78f +index 0000000000..f6e7351c4f --- /dev/null +++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp -@@ -0,0 +1,193 @@ +@@ -0,0 +1,194 @@ +/* -+ * Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -18084,6 +18564,7 @@ index 000000000..9bab8e78f +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" ++#include "prims/jvmtiExport.hpp" +#include "runtime/safepoint.hpp" + +#define __ masm-> @@ -18137,10 +18618,10 @@ index 000000000..9bab8e78f + __ bnez(t0, slow); + __ xorr(robj, c_rarg1, rcounter); + __ xorr(robj, robj, rcounter); // obj, since -+ // robj ^ rcounter ^ rcounter == robj -+ // robj is address dependent on rcounter. -+ ++ // robj ^ rcounter ^ rcounter == robj ++ // robj is address dependent on rcounter. + ++ // Both robj and t0 are clobbered by try_resolve_jobject_in_native. + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + assert_cond(bs != NULL); + bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow); @@ -18150,6 +18631,7 @@ index 000000000..9bab8e78f + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); // Used by the segfault handler + __ add(roffset, robj, roffset); ++ + switch (type) { + case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break; + case T_BYTE: __ lb(result, Address(roffset, 0)); break; @@ -18170,7 +18652,6 @@ index 000000000..9bab8e78f + default: ShouldNotReachHere(); + } + -+ // counter_addr is address dependent on result. + __ xorr(rcounter_addr, rcounter_addr, result); + __ xorr(rcounter_addr, rcounter_addr, result); + __ lw(t0, safepoint_counter_addr); @@ -18246,14 +18727,13 @@ index 000000000..9bab8e78f +} diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp new file mode 100644 -index 000000000..96775e0db +index 0000000000..df3c0267ee --- /dev/null +++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp -@@ -0,0 +1,108 @@ +@@ -0,0 +1,106 @@ +/* -+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -18280,7 +18760,6 @@ index 000000000..96775e0db +#define CPU_RISCV_JNITYPES_RISCV_HPP + +#include "jni.h" -+#include "memory/allocation.hpp" +#include "oops/oop.hpp" + +// This file holds platform-dependent routines used to write primitive jni @@ -18319,9 +18798,9 @@ index 000000000..96775e0db + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. -+ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } -+ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } -+ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } ++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } ++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } ++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } @@ -18360,14 +18839,14 @@ index 000000000..96775e0db +#endif // CPU_RISCV_JNITYPES_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp new file mode 100644 -index 000000000..5d6078bb3 +index 0000000000..e18bd3d8e2 --- /dev/null +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp -@@ -0,0 +1,5861 @@ +@@ -0,0 +1,5410 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -18398,26 +18877,26 @@ index 000000000..5d6078bb3 +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" ++#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/accessDecorators.hpp" +#include "oops/compressedOops.inline.hpp" +#include "oops/klass.inline.hpp" ++#include "oops/oop.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/jniHandles.inline.hpp" +#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" +#include "runtime/thread.hpp" -+#include "utilities/macros.hpp" -+#ifdef COMPILER1 -+#include "c1/c1_LIRAssembler.hpp" -+#endif +#ifdef COMPILER2 -+#include "oops/oop.hpp" +#include "opto/compile.hpp" +#include "opto/intrinsicnode.hpp" -+#include "opto/subnode.hpp" ++#include "opto/node.hpp" ++#include "opto/output.hpp" +#endif + +#ifdef PRODUCT @@ -18429,30 +18908,35 @@ index 000000000..5d6078bb3 + +static void pass_arg0(MacroAssembler* masm, Register arg) { + if (c_rarg0 != arg) { ++ assert_cond(masm != NULL); + masm->mv(c_rarg0, arg); + } +} + +static void pass_arg1(MacroAssembler* masm, Register arg) { + if (c_rarg1 != arg) { ++ assert_cond(masm != NULL); + masm->mv(c_rarg1, arg); + } +} + +static void pass_arg2(MacroAssembler* masm, Register arg) { + if (c_rarg2 != arg) { ++ assert_cond(masm != NULL); + masm->mv(c_rarg2, arg); + } +} + +static void pass_arg3(MacroAssembler* masm, Register arg) { + if (c_rarg3 != arg) { ++ assert_cond(masm != NULL); + masm->mv(c_rarg3, arg); + } +} + -+void MacroAssembler::align(int modulus) { -+ while (offset() % modulus != 0) { nop(); } ++void MacroAssembler::align(int modulus, int extra_offset) { ++ CompressibleRegion cr(this); ++ while ((offset() + extra_offset) % modulus != 0) { nop(); } +} + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { @@ -18553,6 +19037,22 @@ index 000000000..5d6078bb3 +void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} +void MacroAssembler::check_and_handle_popframe(Register java_thread) {} + ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ intptr_t value = *delayed_value_addr; ++ if (value != 0) ++ return RegisterOrConstant(value + offset); ++ ++ // load indirectly to solve generation ordering problem ++ ld(tmp, ExternalAddress((address) delayed_value_addr)); ++ ++ if (offset != 0) ++ add(tmp, tmp, offset); ++ ++ return RegisterOrConstant(tmp); ++} ++ +// Calls to C land +// +// When entering C land, the fp, & esp of the last Java frame have to be recorded @@ -18604,11 +19104,36 @@ index 000000000..5d6078bb3 + if (L.is_bound()) { + set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); + } else { ++ InstructionMark im(this); + L.add_patch_at(code(), locator()); + set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); + } +} + ++// Just like safepoint_poll, but use an acquiring load for thread- ++// local polling. ++// ++// We need an acquire here to ensure that any subsequent load of the ++// global SafepointSynchronize::_state flag is ordered after this load ++// of the local Thread::_polling page. We don't want this poll to ++// return false (i.e. not safepointing) and a later poll of the global ++// SafepointSynchronize::_state spuriously to return true. ++// ++// This is to avoid a race when we're in a native->Java transition ++// racing the code which wakes up from a safepoint. ++// ++void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ membar(MacroAssembler::AnyAny); ++ ld(t1, Address(xthread, Thread::polling_page_offset())); ++ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ andi(t0, t1, SafepointMechanism::poll_bit()); ++ bnez(t0, slow_path); ++ } else { ++ safepoint_poll(slow_path); ++ } ++} ++ +void MacroAssembler::reset_last_Java_frame(bool clear_fp) { + // we must set sp to zero to clear frame + sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); @@ -18693,7 +19218,6 @@ index 000000000..5d6078bb3 + sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); +} + -+ +void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) { return; } + @@ -18710,11 +19234,10 @@ index 000000000..5d6078bb3 + push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); + + mv(c_rarg0, reg); // c_rarg0 : x10 -+ if(b != NULL) { -+ movptr(t0, (uintptr_t)(address)b); -+ } else { -+ ShouldNotReachHere(); -+ } ++ // The length of the instruction sequence emitted should be independent ++ // of the values of the local char buffer address so that the size of mach ++ // nodes for scratch emit and normal emit matches. ++ mv(t0, (address)b); + + // call indirectly to solve generation ordering problem + int32_t offset = 0; @@ -18749,11 +19272,11 @@ index 000000000..5d6078bb3 + } else { + ld(x10, addr); + } -+ if(b != NULL) { -+ movptr(t0, (uintptr_t)(address)b); -+ } else { -+ ShouldNotReachHere(); -+ } ++ ++ // The length of the instruction sequence emitted should be independent ++ // of the values of the local char buffer address so that the size of mach ++ // nodes for scratch emit and normal emit matches. ++ mv(t0, (address)b); + + // call indirectly to solve generation ordering problem + int32_t offset = 0; @@ -18803,51 +19326,47 @@ index 000000000..5d6078bb3 +#endif + if (os::message_box(msg, "Execution stopped, print registers?")) { + ttyLocker ttyl; -+ tty->print_cr(" pc = 0x%016" PRIX64, pc); ++ tty->print_cr(" pc = 0x%016lx", pc); +#ifndef PRODUCT + tty->cr(); + findpc(pc); + tty->cr(); +#endif -+ tty->print_cr(" x0 = 0x%016" PRIx64, regs[0]); -+ tty->print_cr(" x1 = 0x%016" PRIx64, regs[1]); -+ tty->print_cr(" x2 = 0x%016" PRIx64, regs[2]); -+ tty->print_cr(" x3 = 0x%016" PRIx64, regs[3]); -+ tty->print_cr(" x4 = 0x%016" PRIx64, regs[4]); -+ tty->print_cr(" x5 = 0x%016" PRIx64, regs[5]); -+ tty->print_cr(" x6 = 0x%016" PRIx64, regs[6]); -+ tty->print_cr(" x7 = 0x%016" PRIx64, regs[7]); -+ tty->print_cr(" x8 = 0x%016" PRIx64, regs[8]); -+ tty->print_cr(" x9 = 0x%016" PRIx64, regs[9]); -+ tty->print_cr("x10 = 0x%016" PRIx64, regs[10]); -+ tty->print_cr("x11 = 0x%016" PRIx64, regs[11]); -+ tty->print_cr("x12 = 0x%016" PRIx64, regs[12]); -+ tty->print_cr("x13 = 0x%016" PRIx64, regs[13]); -+ tty->print_cr("x14 = 0x%016" PRIx64, regs[14]); -+ tty->print_cr("x15 = 0x%016" PRIx64, regs[15]); -+ tty->print_cr("x16 = 0x%016" PRIx64, regs[16]); -+ tty->print_cr("x17 = 0x%016" PRIx64, regs[17]); -+ tty->print_cr("x18 = 0x%016" PRIx64, regs[18]); -+ tty->print_cr("x19 = 0x%016" PRIx64, regs[19]); -+ tty->print_cr("x20 = 0x%016" PRIx64, regs[20]); -+ tty->print_cr("x21 = 0x%016" PRIx64, regs[21]); -+ tty->print_cr("x22 = 0x%016" PRIx64, regs[22]); -+ tty->print_cr("x23 = 0x%016" PRIx64, regs[23]); -+ tty->print_cr("x24 = 0x%016" PRIx64, regs[24]); -+ tty->print_cr("x25 = 0x%016" PRIx64, regs[25]); -+ tty->print_cr("x26 = 0x%016" PRIx64, regs[26]); -+ tty->print_cr("x27 = 0x%016" PRIx64, regs[27]); -+ tty->print_cr("x28 = 0x%016" PRIx64, regs[28]); -+ tty->print_cr("x30 = 0x%016" PRIx64, regs[30]); -+ tty->print_cr("x31 = 0x%016" PRIx64, regs[31]); ++ tty->print_cr(" x0 = 0x%016lx", regs[0]); ++ tty->print_cr(" x1 = 0x%016lx", regs[1]); ++ tty->print_cr(" x2 = 0x%016lx", regs[2]); ++ tty->print_cr(" x3 = 0x%016lx", regs[3]); ++ tty->print_cr(" x4 = 0x%016lx", regs[4]); ++ tty->print_cr(" x5 = 0x%016lx", regs[5]); ++ tty->print_cr(" x6 = 0x%016lx", regs[6]); ++ tty->print_cr(" x7 = 0x%016lx", regs[7]); ++ tty->print_cr(" x8 = 0x%016lx", regs[8]); ++ tty->print_cr(" x9 = 0x%016lx", regs[9]); ++ tty->print_cr("x10 = 0x%016lx", regs[10]); ++ tty->print_cr("x11 = 0x%016lx", regs[11]); ++ tty->print_cr("x12 = 0x%016lx", regs[12]); ++ tty->print_cr("x13 = 0x%016lx", regs[13]); ++ tty->print_cr("x14 = 0x%016lx", regs[14]); ++ tty->print_cr("x15 = 0x%016lx", regs[15]); ++ tty->print_cr("x16 = 0x%016lx", regs[16]); ++ tty->print_cr("x17 = 0x%016lx", regs[17]); ++ tty->print_cr("x18 = 0x%016lx", regs[18]); ++ tty->print_cr("x19 = 0x%016lx", regs[19]); ++ tty->print_cr("x20 = 0x%016lx", regs[20]); ++ tty->print_cr("x21 = 0x%016lx", regs[21]); ++ tty->print_cr("x22 = 0x%016lx", regs[22]); ++ tty->print_cr("x23 = 0x%016lx", regs[23]); ++ tty->print_cr("x24 = 0x%016lx", regs[24]); ++ tty->print_cr("x25 = 0x%016lx", regs[25]); ++ tty->print_cr("x26 = 0x%016lx", regs[26]); ++ tty->print_cr("x27 = 0x%016lx", regs[27]); ++ tty->print_cr("x28 = 0x%016lx", regs[28]); ++ tty->print_cr("x30 = 0x%016lx", regs[30]); ++ tty->print_cr("x31 = 0x%016lx", regs[31]); + BREAKPOINT; + } -+ ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); -+ } else { -+ ttyLocker ttyl; -+ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); -+ assert(false, "DEBUG MESSAGE: %s", msg); + } ++ fatal("DEBUG MESSAGE: %s", msg); +} + +void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) { @@ -18873,13 +19392,12 @@ index 000000000..5d6078bb3 + +void MacroAssembler::stop(const char* msg) { + address ip = pc(); -+ push_reg(RegSet::range(x0, x31), sp); -+ if(msg != NULL && ip != NULL) { -+ mv(c_rarg0, (uintptr_t)(address)msg); -+ mv(c_rarg1, (uintptr_t)(address)ip); -+ } else { -+ ShouldNotReachHere(); -+ } ++ pusha(); ++ // The length of the instruction sequence emitted should be independent ++ // of the values of msg and ip so that the size of mach nodes for scratch ++ // emit and normal emit matches. ++ mv(c_rarg0, (address)msg); ++ mv(c_rarg1, (address)ip); + mv(c_rarg2, sp); + mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); + jalr(c_rarg3); @@ -19079,29 +19597,23 @@ index 000000000..5d6078bb3 +} + +void MacroAssembler::la(Register Rd, const Address &adr) { -+ code_section()->relocate(pc(), adr.rspec()); ++ InstructionMark im(this); ++ code_section()->relocate(inst_mark(), adr.rspec()); + relocInfo::relocType rtype = adr.rspec().reloc()->type(); + -+ switch(adr.getMode()) { ++ switch (adr.getMode()) { + case Address::literal: { + if (rtype == relocInfo::none) { -+ mv(Rd, (intptr_t)(adr.target())); ++ li(Rd, (intptr_t)(adr.target())); + } else { + movptr(Rd, adr.target()); + } + break; + } -+ case Address::base_plus_offset:{ -+ Register base = adr.base(); -+ int64_t offset = adr.offset(); -+ if (offset == 0 && Rd != base) { -+ mv(Rd, base); -+ } else if (offset != 0 && Rd != base) { -+ add(Rd, base, offset, Rd); -+ } else if (offset != 0 && Rd == base) { -+ Register tmp = (Rd == t0) ? t1 : t0; -+ add(base, base, offset, tmp); -+ } ++ case Address::base_plus_offset: { ++ int32_t offset = 0; ++ baseOffset(Rd, adr, offset); ++ addi(Rd, Rd, offset); + break; + } + default: @@ -19144,26 +19656,31 @@ index 000000000..5d6078bb3 + + INSN(beq, feq, bnez); + INSN(bne, feq, beqz); ++ +#undef INSN + + +#define INSN(NAME, FLOATCMP1, FLOATCMP2) \ + void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ -+ if(is_unordered) { \ ++ if (is_unordered) { \ ++ /* jump if either source is NaN or condition is expected */ \ + FLOATCMP2##_s(t0, Rs2, Rs1); \ + beqz(t0, l, is_far); \ + } else { \ ++ /* jump if no NaN in source and condition is expected */ \ + FLOATCMP1##_s(t0, Rs1, Rs2); \ + bnez(t0, l, is_far); \ + } \ + } \ + void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ + bool is_far, bool is_unordered) { \ -+ if(is_unordered) { \ ++ if (is_unordered) { \ ++ /* jump if either source is NaN or condition is expected */ \ + FLOATCMP2##_d(t0, Rs2, Rs1); \ + beqz(t0, l, is_far); \ + } else { \ ++ /* jump if no NaN in source and condition is expected */ \ + FLOATCMP1##_d(t0, Rs1, Rs2); \ + bnez(t0, l, is_far); \ + } \ @@ -19271,110 +19788,6 @@ index 000000000..5d6078bb3 + +#undef INSN + -+#ifdef COMPILER2 -+ -+typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); -+typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, -+ bool is_far, bool is_unordered); -+ -+static conditional_branch_insn conditional_branches[] = -+{ -+ /* SHORT branches */ -+ (conditional_branch_insn)&Assembler::beq, -+ (conditional_branch_insn)&Assembler::bgt, -+ NULL, // BoolTest::overflow -+ (conditional_branch_insn)&Assembler::blt, -+ (conditional_branch_insn)&Assembler::bne, -+ (conditional_branch_insn)&Assembler::ble, -+ NULL, // BoolTest::no_overflow -+ (conditional_branch_insn)&Assembler::bge, -+ -+ /* UNSIGNED branches */ -+ (conditional_branch_insn)&Assembler::beq, -+ (conditional_branch_insn)&Assembler::bgtu, -+ NULL, -+ (conditional_branch_insn)&Assembler::bltu, -+ (conditional_branch_insn)&Assembler::bne, -+ (conditional_branch_insn)&Assembler::bleu, -+ NULL, -+ (conditional_branch_insn)&Assembler::bgeu -+}; -+ -+static float_conditional_branch_insn float_conditional_branches[] = -+{ -+ /* FLOAT SHORT branches */ -+ (float_conditional_branch_insn)&MacroAssembler::float_beq, -+ (float_conditional_branch_insn)&MacroAssembler::float_bgt, -+ NULL, // BoolTest::overflow -+ (float_conditional_branch_insn)&MacroAssembler::float_blt, -+ (float_conditional_branch_insn)&MacroAssembler::float_bne, -+ (float_conditional_branch_insn)&MacroAssembler::float_ble, -+ NULL, // BoolTest::no_overflow -+ (float_conditional_branch_insn)&MacroAssembler::float_bge, -+ -+ /* DOUBLE SHORT branches */ -+ (float_conditional_branch_insn)&MacroAssembler::double_beq, -+ (float_conditional_branch_insn)&MacroAssembler::double_bgt, -+ NULL, -+ (float_conditional_branch_insn)&MacroAssembler::double_blt, -+ (float_conditional_branch_insn)&MacroAssembler::double_bne, -+ (float_conditional_branch_insn)&MacroAssembler::double_ble, -+ NULL, -+ (float_conditional_branch_insn)&MacroAssembler::double_bge -+}; -+ -+void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { -+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), -+ "invalid conditional branch index"); -+ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); -+} -+ -+// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use -+// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). -+void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { -+ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), -+ "invalid float conditional branch index"); -+ int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask); -+ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, -+ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); -+} -+ -+void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { -+ switch (cmpFlag) { -+ case BoolTest::eq: -+ case BoolTest::le: -+ beqz(op1, L, is_far); -+ break; -+ case BoolTest::ne: -+ case BoolTest::gt: -+ bnez(op1, L, is_far); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} -+ -+void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { -+ switch (cmpFlag) { -+ case BoolTest::eq: -+ beqz(op1, L, is_far); -+ break; -+ case BoolTest::ne: -+ bnez(op1, L, is_far); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} -+ -+void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { -+ Label L; -+ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); -+ mv(dst, src); -+ bind(L); -+} -+#endif -+ +void MacroAssembler::push_reg(Register Rs) +{ + addi(esp, esp, 0 - wordSize); @@ -19390,7 +19803,7 @@ index 000000000..5d6078bb3 +int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { + int count = 0; + // Scan bitset to accumulate register pairs -+ for (int reg = 31; reg >= 0; reg --) { ++ for (int reg = 31; reg >= 0; reg--) { + if ((1U << 31) & bitset) { + regs[count++] = reg; + } @@ -19403,6 +19816,7 @@ index 000000000..5d6078bb3 +// Return the number of words pushed +int MacroAssembler::push_reg(unsigned int bitset, Register stack) { + DEBUG_ONLY(int words_pushed = 0;) ++ CompressibleRegion cr(this); + + unsigned char regs[32]; + int count = bitset_to_regs(bitset, regs); @@ -19424,6 +19838,7 @@ index 000000000..5d6078bb3 + +int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { + DEBUG_ONLY(int words_popped = 0;) ++ CompressibleRegion cr(this); + + unsigned char regs[32]; + int count = bitset_to_regs(bitset, regs); @@ -19443,13 +19858,52 @@ index 000000000..5d6078bb3 + return count; +} + -+RegSet MacroAssembler::call_clobbered_registers() { -+ // Push integer registers x7, x10-x17, x28-x31. -+ return RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31); ++// Push float registers in the bitset, except sp. ++// Return the number of heapwords pushed. ++int MacroAssembler::push_fp(unsigned int bitset, Register stack) { ++ CompressibleRegion cr(this); ++ int words_pushed = 0; ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ int push_slots = count + (count & 1); ++ ++ if (count) { ++ addi(stack, stack, -push_slots * wordSize); ++ } ++ ++ for (int i = count - 1; i >= 0; i--) { ++ fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize)); ++ words_pushed++; ++ } ++ ++ assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count); ++ return count; ++} ++ ++int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { ++ CompressibleRegion cr(this); ++ int words_popped = 0; ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ int pop_slots = count + (count & 1); ++ ++ for (int i = count - 1; i >= 0; i--) { ++ fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize)); ++ words_popped++; ++ } ++ ++ if (count) { ++ addi(stack, stack, pop_slots * wordSize); ++ } ++ ++ assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count); ++ return count; +} + -+void MacroAssembler::push_call_clobbered_registers() { -+ push_reg(call_clobbered_registers(), sp); ++void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) { ++ CompressibleRegion cr(this); ++ // Push integer registers x7, x10-x17, x28-x31. ++ push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); + + // Push float registers f0-f7, f10-f17, f28-f31. + addi(sp, sp, - wordSize * 20); @@ -19461,7 +19915,8 @@ index 000000000..5d6078bb3 + } +} + -+void MacroAssembler::pop_call_clobbered_registers() { ++void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) { ++ CompressibleRegion cr(this); + int offset = 0; + for (int i = 0; i < 32; i++) { + if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { @@ -19470,39 +19925,35 @@ index 000000000..5d6078bb3 + } + addi(sp, sp, wordSize * 20); + -+ pop_reg(call_clobbered_registers(), sp); ++ pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp); ++} ++ ++// Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). ++void MacroAssembler::pusha() { ++ CompressibleRegion cr(this); ++ push_reg(0xffffffe2, sp); ++} ++ ++// Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4). ++void MacroAssembler::popa() { ++ CompressibleRegion cr(this); ++ pop_reg(0xffffffe2, sp); +} + -+void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { ++void MacroAssembler::push_CPU_state() { ++ CompressibleRegion cr(this); + // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) -+ push_reg(RegSet::range(x5, x31), sp); ++ push_reg(0xffffffe0, sp); + + // float registers + addi(sp, sp, - 32 * wordSize); + for (int i = 0; i < 32; i++) { + fsd(as_FloatRegister(i), Address(sp, i * wordSize)); + } -+ -+ // vector registers -+ if (save_vectors) { -+ sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers); -+ vsetvli(t0, x0, Assembler::e64, Assembler::m8); -+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { -+ add(t0, sp, vector_size_in_bytes * i); -+ vse64_v(as_VectorRegister(i), t0); -+ } -+ } +} + -+void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { -+ // vector registers -+ if (restore_vectors) { -+ vsetvli(t0, x0, Assembler::e64, Assembler::m8); -+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { -+ vle64_v(as_VectorRegister(i), sp); -+ add(sp, sp, vector_size_in_bytes * 8); -+ } -+ } ++void MacroAssembler::pop_CPU_state() { ++ CompressibleRegion cr(this); + + // float registers + for (int i = 0; i < 32; i++) { @@ -19511,7 +19962,7 @@ index 000000000..5d6078bb3 + addi(sp, sp, 32 * wordSize); + + // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) -+ pop_reg(RegSet::range(x5, x31), sp); ++ pop_reg(0xffffffe0, sp); +} + +static int patch_offset_in_jal(address branch, int64_t offset) { @@ -19661,10 +20112,14 @@ index 000000000..5d6078bb3 + int64_t imm = (intptr_t)target; + return patch_imm_in_li32(branch, (int32_t)imm); + } else { -+ tty->print_cr("pd_patch_instruction_size: instruction 0x%x could not be patched!\n", *(unsigned*)branch); ++#ifdef ASSERT ++ tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n", ++ *(unsigned*)branch, p2i(branch)); ++ Disassembler::decode(branch - 16, branch + 16); ++#endif + ShouldNotReachHere(); ++ return -1; + } -+ return -1; +} + +address MacroAssembler::target_addr_for_insn(address insn_addr) { @@ -19721,6 +20176,13 @@ index 000000000..5d6078bb3 + code_section()->relocate(pc(), dest.rspec()); + movptr(Rd, dest.target()); +} ++ ++void MacroAssembler::mv(Register Rd, address addr) { ++ // Here in case of use with relocation, use fix length instruction ++ // movptr instead of li ++ movptr(Rd, addr); ++} ++ +void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { + if (src.is_register()) { + mv(Rd, src.as_register()); @@ -19795,22 +20257,6 @@ index 000000000..5d6078bb3 + } +} + -+// rotate right with imm bits -+void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) -+{ -+ if (UseZbb) { -+ rori(dst, src, shift); -+ return; -+ } -+ -+ assert_different_registers(dst, tmp); -+ assert_different_registers(src, tmp); -+ assert(shift < 64, "shift amount must be < 64"); -+ slli(tmp, src, 64 - shift); -+ srli(dst, src, shift); -+ orr(dst, dst, tmp); -+} -+ +// reverse bytes in halfword in lower 16 bits and sign-extend +// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) +void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { @@ -19894,6 +20340,7 @@ index 000000000..5d6078bb3 + slli(Rd, Rs, 16); + orr(Rd, Rd, tmp1); +} ++ +// reverse bytes in each halfword +// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] +void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { @@ -19954,12 +20401,28 @@ index 000000000..5d6078bb3 + orr(Rd, tmp1, Rd); +} + ++// rotate right with shift bits ++void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) ++{ ++ if (UseZbb) { ++ rori(dst, src, shift); ++ return; ++ } ++ ++ assert_different_registers(dst, tmp); ++ assert_different_registers(src, tmp); ++ assert(shift < 64, "shift amount must be < 64"); ++ slli(tmp, src, 64 - shift); ++ srli(dst, src, shift); ++ orr(dst, dst, tmp); ++} ++ +void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { + if (is_imm_in_range(imm, 12, 0)) { + and_imm12(Rd, Rn, imm); + } else { + assert_different_registers(Rn, tmp); -+ mv(tmp, imm); ++ li(tmp, imm); + andr(Rd, Rn, tmp); + } +} @@ -19969,11 +20432,11 @@ index 000000000..5d6078bb3 + if (src.is_register()) { + orr(tmp1, tmp1, src.as_register()); + } else { -+ if(is_imm_in_range(src.as_constant(), 12, 0)) { ++ if (is_imm_in_range(src.as_constant(), 12, 0)) { + ori(tmp1, tmp1, src.as_constant()); + } else { + assert_different_registers(tmp1, tmp2); -+ mv(tmp2, src.as_constant()); ++ li(tmp2, src.as_constant()); + orr(tmp1, tmp1, tmp2); + } + } @@ -19996,7 +20459,7 @@ index 000000000..5d6078bb3 +} + +// Move an oop into a register. immediate is true if we want -+// immediate instrcutions, i.e. we are not going to patch this ++// immediate instructions, i.e. we are not going to patch this +// instruction while the code is being executed by another thread. In +// that case we can use move immediates rather than the constant pool. +void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { @@ -20062,6 +20525,7 @@ index 000000000..5d6078bb3 +} + +SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { ++ assert_cond(masm != NULL); + int32_t offset = 0; + _masm = masm; + _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset); @@ -20070,6 +20534,7 @@ index 000000000..5d6078bb3 +} + +SkipIfEqual::~SkipIfEqual() { ++ assert_cond(_masm != NULL); + _masm->bind(_label); + _masm = NULL; +} @@ -20116,14 +20581,14 @@ index 000000000..5d6078bb3 + +void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, + Address dst, Register src, -+ Register tmp1, Register tmp2, Register tmp3) { ++ Register tmp1, Register thread_tmp) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { -+ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); ++ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } else { -+ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); ++ bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } +} + @@ -20200,7 +20665,8 @@ index 000000000..5d6078bb3 + } + + assert_different_registers(src, xbase); -+ mv(xbase, (uintptr_t)Universe::narrow_klass_base()); ++ li(xbase, (uintptr_t)Universe::narrow_klass_base()); ++ + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + assert_different_registers(t0, xbase); @@ -20208,8 +20674,8 @@ index 000000000..5d6078bb3 + } else { + add(dst, xbase, src); + } -+ if (xbase == xheapbase) { reinit_heapbase(); } + ++ if (xbase == xheapbase) { reinit_heapbase(); } +} + +void MacroAssembler::encode_klass_not_null(Register r) { @@ -20241,7 +20707,7 @@ index 000000000..5d6078bb3 + } + + assert_different_registers(src, xbase); -+ mv(xbase, (intptr_t)Universe::narrow_klass_base()); ++ li(xbase, (intptr_t)Universe::narrow_klass_base()); + sub(dst, src, xbase); + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); @@ -20290,8 +20756,8 @@ index 000000000..5d6078bb3 +} + +void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, -+ Register tmp2, Register tmp3, DecoratorSet decorators) { -+ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3); ++ Register thread_tmp, DecoratorSet decorators) { ++ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); +} + +void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, @@ -20306,7 +20772,7 @@ index 000000000..5d6078bb3 + +// Used for storing NULLs. +void MacroAssembler::store_heap_oop_null(Address dst) { -+ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); ++ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); +} + +int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, @@ -20394,7 +20860,7 @@ index 000000000..5d6078bb3 + if (itable_index.is_register()) { + slli(t0, itable_index.as_register(), 3); + } else { -+ mv(t0, itable_index.as_constant() << 3); ++ li(t0, itable_index.as_constant() << 3); + } + add(recv_klass, recv_klass, t0); + if (itentry_off) { @@ -20439,17 +20905,11 @@ index 000000000..5d6078bb3 + ld(method_result, Address(method_result, vtable_offset_in_bytes)); + } else { + vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; -+ Address addr = form_address(recv_klass, /* base */ -+ vtable_offset_in_bytes, /* offset */ -+ 12, /* expect offset bits */ -+ method_result); /* temp reg */ -+ ld(method_result, addr); ++ ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes)); + } +} + +void MacroAssembler::membar(uint32_t order_constraint) { -+ if (!os::is_MP()) { return; } -+ + address prev = pc() - NativeMembar::instruction_size; + address last = code()->last_insn(); + @@ -20470,6 +20930,21 @@ index 000000000..5d6078bb3 + } +} + ++// Form an addres from base + offset in Rd. Rd my or may not ++// actually be used: you must use the Address that is returned. It ++// is up to you to ensure that the shift provided mathces the size ++// of your data. ++Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) { ++ if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12 ++ return Address(base, byte_offset); ++ } ++ ++ // Do it the hard way ++ mv(Rd, byte_offset); ++ add(Rd, base, Rd); ++ return Address(Rd); ++} ++ +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register tmp_reg, @@ -20480,21 +20955,6 @@ index 000000000..5d6078bb3 + bind(L_failure); +} + -+// Write serialization page so VM thread can do a pseudo remote membar. -+// We use the current thread pointer to calculate a thread specific -+// offset to write to within the page. This minimizes bus traffic -+// due to cache line collision. -+void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) { -+ srli(tmp2, thread, os::get_serialize_page_shift_count()); -+ -+ int mask = os::vm_page_size() - sizeof(int); -+ andi(tmp2, tmp2, mask, tmp1); -+ -+ add(tmp1, tmp2, (intptr_t)os::get_memory_serialize_page()); -+ membar(MacroAssembler::AnyAny); -+ sw(zr, Address(tmp1)); -+} -+ +void MacroAssembler::safepoint_poll(Label& slow_path) { + if (SafepointMechanism::uses_thread_local_poll()) { + ld(t1, Address(xthread, Thread::polling_page_offset())); @@ -20509,30 +20969,6 @@ index 000000000..5d6078bb3 + } +} + -+// Just like safepoint_poll, but use an acquiring load for thread- -+// local polling. -+// -+// We need an acquire here to ensure that any subsequent load of the -+// global SafepointSynchronize::_state flag is ordered after this load -+// of the local Thread::_polling page. We don't want this poll to -+// return false (i.e. not safepointing) and a later poll of the global -+// SafepointSynchronize::_state spuriously to return true. -+// -+// This is to avoid a race when we're in a native->Java transition -+// racing the code which wakes up from a safepoint. -+// -+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { -+ if (SafepointMechanism::uses_thread_local_poll()) { -+ membar(MacroAssembler::AnyAny); -+ ld(t1, Address(xthread, Thread::polling_page_offset())); -+ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ andi(t0, t1, SafepointMechanism::poll_bit()); -+ bnez(t0, slow_path); -+ } else { -+ safepoint_poll(slow_path); -+ } -+} -+ +void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, + Label &succeed, Label *fail) { + // oldv holds comparison value @@ -20540,17 +20976,16 @@ index 000000000..5d6078bb3 + // addr identifies memory word to compare against/update + Label retry_load, nope; + bind(retry_load); -+ // flush and load exclusive from the memory location -+ // and fail if it is not what we expect ++ // Load reserved from the memory location + lr_d(tmp, addr, Assembler::aqrl); ++ // Fail and exit if it is not what we expect + bne(tmp, oldv, nope); -+ // if we store+flush with no intervening write tmp wil be zero ++ // If the store conditional succeeds, tmp will be zero + sc_d(tmp, newv, addr, Assembler::rl); + beqz(tmp, succeed); -+ // retry so we only ever return after a load fails to compare -+ // ensures we don't return a stale value after a failed write. ++ // Retry only when the store conditional failed + j(retry_load); -+ // if the memory word differs we return it in oldv and signal a fail ++ + bind(nope); + membar(AnyAny); + mv(oldv, tmp); @@ -20616,9 +21051,10 @@ index 000000000..5d6078bb3 + andi(aligned_addr, addr, ~3); + + if (size == int8) { -+ mv(mask, 0xff); ++ addi(mask, zr, 0xff); + } else { -+ mv(mask, -1); ++ // size == int16 case ++ addi(mask, zr, -1); + zero_extend(mask, mask, 16); + } + sll(mask, mask, shift); @@ -20658,7 +21094,7 @@ index 000000000..5d6078bb3 + bnez(tmp, retry); + + if (result_as_bool) { -+ mv(result, 1); ++ addi(result, zr, 1); + j(done); + + bind(fail); @@ -20670,16 +21106,16 @@ index 000000000..5d6078bb3 + + bind(fail); + srl(result, tmp, shift); -+ } + -+ if (size == int8) { -+ sign_extend(result, result, 8); -+ } else if (size == int16) { -+ sign_extend(result, result, 16); ++ if (size == int8) { ++ sign_extend(result, result, 8); ++ } else { ++ // size == int16 case ++ sign_extend(result, result, 16); ++ } + } +} + -+// weak cmpxchg narrow value will kill t0, t1, expected, new_val and tmps. +// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement +// the weak CAS stuff. The major difference is that it just failed when store conditional +// failed. @@ -20693,7 +21129,7 @@ index 000000000..5d6078bb3 + assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); + cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); + -+ Label fail, done; ++ Label succ, fail, done; + + lr_w(old, aligned_addr, acquire); + andr(tmp, old, mask); @@ -20702,14 +21138,13 @@ index 000000000..5d6078bb3 + andr(tmp, old, not_mask); + orr(tmp, tmp, new_val); + sc_w(tmp, tmp, aligned_addr, release); -+ bnez(tmp, fail); ++ beqz(tmp, succ); + -+ // Success -+ mv(result, 1); ++ bind(fail); ++ addi(result, zr, 1); + j(done); + -+ // Fail -+ bind(fail); ++ bind(succ); + mv(result, zr); + + bind(done); @@ -20731,7 +21166,7 @@ index 000000000..5d6078bb3 + + // equal, succeed + if (result_as_bool) { -+ mv(result, 1); ++ li(result, 1); + } else { + mv(result, expected); + } @@ -20753,22 +21188,20 @@ index 000000000..5d6078bb3 + enum operand_size size, + Assembler::Aqrl acquire, Assembler::Aqrl release, + Register result) { -+ assert(size != int8 && size != int16, "unsupported operand size"); -+ -+ Label fail, done; ++ Label fail, done, sc_done; + load_reserved(addr, size, acquire); + bne(t0, expected, fail); + store_conditional(addr, new_val, size, release); -+ bnez(t0, fail); -+ -+ // Success -+ mv(result, 1); -+ j(done); ++ beqz(t0, sc_done); + -+ // Fail ++ // fail + bind(fail); -+ mv(result, zr); ++ li(result, 1); ++ j(done); + ++ // sc_done ++ bind(sc_done); ++ mv(result, 0); + bind(done); +} + @@ -20817,229 +21250,7 @@ index 000000000..5d6078bb3 + +#undef ATOMIC_XCHGU + -+void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) { -+ assert(UseBiasedLocking, "why call this otherwise?"); -+ -+ // Check for biased locking unlock case, which is a no-op -+ // Note: we do not have to check the thread ID for two reasons. -+ // First, the interpreter checks for IllegalMonitorStateException at -+ // a higher level. Second, if the bias was revoked while we held the -+ // lock, the object could not be rebiased toward another thread, so -+ // the bias bit would be clear. -+ ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); -+ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); // 1 << 3 -+ sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern); -+ if (flag->is_valid()) { mv(flag, tmp_reg); } -+ beqz(tmp_reg, done); -+} -+ -+void MacroAssembler::load_prototype_header(Register dst, Register src) { -+ load_klass(dst, src); -+ ld(dst, Address(dst, Klass::prototype_header_offset())); -+} -+ -+int MacroAssembler::biased_locking_enter(Register lock_reg, -+ Register obj_reg, -+ Register swap_reg, -+ Register tmp_reg, -+ bool swap_reg_contains_mark, -+ Label& done, -+ Label* slow_case, -+ BiasedLockingCounters* counters, -+ Register flag) { -+ assert(UseBiasedLocking, "why call this otherwise?"); -+ assert_different_registers(lock_reg, obj_reg, swap_reg); -+ -+ if (PrintBiasedLockingStatistics && counters == NULL) { -+ counters = BiasedLocking::counters(); -+ } -+ -+ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0, flag); -+ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); -+ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); -+ -+ // Biased locking -+ // See whether the lock is currently biased toward our thread and -+ // whether the epoch is still valid -+ // Note that the runtime guarantees sufficient alignment of JavaThread -+ // pointers to allow age to be placed into low bits -+ // First check to see whether biasing is even enabled for this object -+ Label cas_label; -+ int null_check_offset = -1; -+ if (!swap_reg_contains_mark) { -+ null_check_offset = offset(); -+ ld(swap_reg, mark_addr); -+ } -+ andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); -+ xori(t0, tmp_reg, markOopDesc::biased_lock_pattern); -+ bnez(t0, cas_label); // don't care flag unless jumping to done -+ // The bias pattern is present in the object's header. Need to check -+ // whether the bias owner and the epoch are both still current. -+ load_prototype_header(tmp_reg, obj_reg); -+ orr(tmp_reg, tmp_reg, xthread); -+ xorr(tmp_reg, swap_reg, tmp_reg); -+ andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); -+ if (flag->is_valid()) { -+ mv(flag, tmp_reg); -+ } -+ -+ if (counters != NULL) { -+ Label around; -+ bnez(tmp_reg, around); -+ atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); -+ j(done); -+ bind(around); -+ } else { -+ beqz(tmp_reg, done); -+ } -+ -+ Label try_revoke_bias; -+ Label try_rebias; -+ -+ // At this point we know that the header has the bias pattern and -+ // that we are not the bias owner in the current epoch. We need to -+ // figure out more details about the state of the header in order to -+ // know what operations can be legally performed on the object's -+ // header. -+ -+ // If the low three bits in the xor result aren't clear, that means -+ // the prototype header is no longer biased and we have to revoke -+ // the bias on this object. -+ andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place); -+ bnez(t0, try_revoke_bias); -+ -+ // Biasing is still enabled for this data type. See whether the -+ // epoch of the current bias is still valid, meaning that the epoch -+ // bits of the mark word are equal to the epoch bits of the -+ // prototype header. (Note that the prototype header's epoch bits -+ // only change at a safepoint.) If not, attempt to rebias the object -+ // toward the current thread. Note that we must be absolutely sure -+ // that the current epoch is invalid in order to do this because -+ // otherwise the manipulations it performs on the mark word are -+ // illegal. -+ andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place); -+ bnez(t0, try_rebias); -+ -+ // The epoch of the current bias is still valid but we know nothing -+ // about the owner; it might be set or it might be clear. Try to -+ // acquire the bias of the object using an atomic operation. If this -+ // fails we will go in to the runtime to revoke the object's bias. -+ // Note that we first construct the presumed unbiased header so we -+ // don't accidentally blow away another thread's valid bias. -+ { -+ Label cas_success; -+ Label counter; -+ mv(t0, (int64_t)(markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); -+ andr(swap_reg, swap_reg, t0); -+ orr(tmp_reg, swap_reg, xthread); -+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); -+ // cas failed here if slow_cass == NULL -+ if (flag->is_valid()) { -+ mv(flag, 1); -+ j(counter); -+ } -+ -+ // If the biasing toward our thread failed, this means that -+ // another thread succeeded in biasing it toward itself and we -+ // need to revoke that bias. The revocation will occur in the -+ // interpreter runtime in the slow case. -+ bind(cas_success); -+ if (flag->is_valid()) { -+ mv(flag, 0); -+ bind(counter); -+ } -+ -+ if (counters != NULL) { -+ atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), -+ tmp_reg, t0); -+ } -+ } -+ j(done); -+ -+ bind(try_rebias); -+ // At this point we know the epoch has expired, meaning that the -+ // current "bias owner", if any, is actually invalid. Under these -+ // circumstances _only_, we are allowed to use the current header's -+ // value as the comparison value when doing the cas to acquire the -+ // bias in the current epoch. In other words, we allow transfer of -+ // the bias from one thread to another directly in this situation. -+ // -+ // FIXME: due to a lack of registers we currently blow away the age -+ // bits in this situation. Should attempt to preserve them. -+ { -+ Label cas_success; -+ Label counter; -+ load_prototype_header(tmp_reg, obj_reg); -+ orr(tmp_reg, xthread, tmp_reg); -+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); -+ // cas failed here if slow_cass == NULL -+ if (flag->is_valid()) { -+ mv(flag, 1); -+ j(counter); -+ } -+ -+ // If the biasing toward our thread failed, then another thread -+ // succeeded in biasing it toward itself and we need to revoke that -+ // bias. The revocation will occur in the runtime in the slow case. -+ bind(cas_success); -+ if (flag->is_valid()) { -+ mv(flag, 0); -+ bind(counter); -+ } -+ -+ if (counters != NULL) { -+ atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), -+ tmp_reg, t0); -+ } -+ } -+ j(done); -+ -+ // don't care flag unless jumping to done -+ bind(try_revoke_bias); -+ // The prototype mark in the klass doesn't have the bias bit set any -+ // more, indicating that objects of this data type are not supposed -+ // to be biased any more. We are going to try to reset the mark of -+ // this object to the prototype value and fall through to the -+ // CAS-based locking scheme. Note that if our CAS fails, it means -+ // that another thread raced us for the privilege of revoking the -+ // bias of this particular object, so it's okay to continue in the -+ // normal locking code. -+ // -+ // FIXME: due to a lack of registers we currently blow away the age -+ // bits in this situation. Should attempt to preserve them. -+ { -+ Label cas_success, nope; -+ load_prototype_header(tmp_reg, obj_reg); -+ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); -+ bind(cas_success); -+ -+ // Fall through to the normal CAS-based lock, because no matter what -+ // the result of the above CAS, some thread must have succeeded in -+ // removing the bias bit from the object's header. -+ if (counters != NULL) { -+ atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, -+ t0); -+ } -+ bind(nope); -+ } -+ -+ bind(cas_label); -+ -+ return null_check_offset; -+} -+ -+void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { -+ Label retry_load; -+ bind(retry_load); -+ // flush and load exclusive from the memory location -+ lr_w(tmp, counter_addr); -+ addw(tmp, tmp, 1); -+ // if we store+flush with no intervening write tmp wil be zero -+ sc_w(tmp, tmp, counter_addr); -+ bnez(tmp, retry_load); -+} -+ -+void MacroAssembler::far_jump(Address entry, Register tmp) { ++void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); @@ -21048,13 +21259,15 @@ index 000000000..5d6078bb3 + // We can use auipc + jalr here because we know that the total size of + // the code cache cannot exceed 2Gb. + la_patchable(tmp, entry, offset); ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } + jalr(x0, tmp, offset); + } else { ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } + j(entry); + } +} + -+void MacroAssembler::far_call(Address entry, Register tmp) { ++void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); @@ -21063,8 +21276,10 @@ index 000000000..5d6078bb3 + // We can use auipc + jalr here because we know that the total size of + // the code cache cannot exceed 2Gb. + la_patchable(tmp, entry, offset); ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } + jalr(x1, tmp, offset); // link + } else { ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } + jal(entry); // link + } +} @@ -21079,7 +21294,7 @@ index 000000000..5d6078bb3 + assert_different_registers(sub_klass, super_klass, tmp_reg); + bool must_load_sco = (super_check_offset == noreg); + if (must_load_sco) { -+ assert(tmp_reg != noreg, "supply either a tmp or a register offset"); ++ assert(tmp_reg != noreg, "supply either a temp or a register offset"); + } else { + assert_different_registers(sub_klass, super_klass, super_check_offset); + } @@ -21160,15 +21375,15 @@ index 000000000..5d6078bb3 + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, -+ Register tmp_reg, ++ Register tmp1_reg, + Register tmp2_reg, + Label* L_success, + Label* L_failure) { -+ assert_different_registers(sub_klass, super_klass, tmp_reg); ++ assert_different_registers(sub_klass, super_klass, tmp1_reg); + if (tmp2_reg != noreg) { -+ assert_different_registers(sub_klass, super_klass, tmp_reg, tmp2_reg, t0); ++ assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0); + } -+#define IS_A_TEMP(reg) ((reg) == tmp_reg || (reg) == tmp2_reg) ++#define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg) + + Label L_fallthrough; + int label_nulls = 0; @@ -21177,7 +21392,7 @@ index 000000000..5d6078bb3 + + assert(label_nulls <= 1, "at most one NULL in the batch"); + -+ // A couple of useful fields in sub_klass: ++ // A couple of usefule fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); @@ -21228,14 +21443,14 @@ index 000000000..5d6078bb3 + add(x15, x15, Array::base_offset_in_bytes()); + + // Set t0 to an obvious invalid value, falling through by default -+ mv(t0, -1); ++ li(t0, -1); + // Scan X12 words at [X15] for an occurrence of X10. + repne_scan(x15, x10, x12, t0); + + // pop will restore x10, so we should use a temp register to keep its value + mv(t1, x10); + -+ // Unspill the temp. registers: ++ // Unspill the temp registers: + pop_reg(pushed_registers, sp); + + bne(t1, t0, *L_failure); @@ -21268,26 +21483,28 @@ index 000000000..5d6078bb3 +void MacroAssembler::eden_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, -+ Register tmp1, ++ Register tmp, + Label& slow_case, + bool is_far) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, is_far); ++ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far); +} + + +// get_thread() can be called anywhere inside generated code so we +// need to save whatever non-callee save context might get clobbered -+// by the call to Thread::current() or, indeed, the call setup code ++// by the call to Thread::current() or, indeed, the call setup code. +void MacroAssembler::get_thread(Register thread) { + // save all call-clobbered regs except thread -+ RegSet saved_regs = RegSet::of(x10) + ra - thread; ++ RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) + ++ RegSet::range(x28, x31) + ra - thread; + push_reg(saved_regs, sp); + -+ mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); -+ jalr(ra); -+ if (thread != c_rarg0) { -+ mv(thread, c_rarg0); ++ int32_t offset = 0; ++ movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset); ++ jalr(ra, ra, offset); ++ if (thread != x10) { ++ mv(thread, x10); + } + + // restore pushed registers @@ -21295,8 +21512,9 @@ index 000000000..5d6078bb3 +} + +void MacroAssembler::load_byte_map_base(Register reg) { -+ jbyte *byte_map_base = ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); -+ mv(reg, (uint64_t)byte_map_base); ++ jbyte *byte_map_base = ++ ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); ++ li(reg, (uint64_t)byte_map_base); +} + +void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { @@ -21310,11 +21528,12 @@ index 000000000..5d6078bb3 + assert(is_valid_riscv64_address(dest.target()), "bad address"); + assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); + -+ code_section()->relocate(pc(), dest.rspec()); ++ InstructionMark im(this); ++ code_section()->relocate(inst_mark(), dest.rspec()); + // RISC-V doesn't compute a page-aligned address, in order to partially + // compensate for the use of *signed* offsets in its base+disp12 + // addressing mode (RISC-V's PC-relative reach remains asymmetric -+ // [-(2G + 2K), 2G - 2K)). ++ // [-(2G + 2K), 2G - 2k). + if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { + int64_t distance = dest.target() - pc(); + auipc(reg1, (int32_t)distance + 0x800); @@ -21325,7 +21544,8 @@ index 000000000..5d6078bb3 +} + +void MacroAssembler::build_frame(int framesize) { -+ assert(framesize > 0, "framesize must be > 0"); ++ assert(framesize >= 2, "framesize must include space for FP/RA"); ++ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); + sub(sp, sp, framesize); + sd(fp, Address(sp, framesize - 2 * wordSize)); + sd(ra, Address(sp, framesize - wordSize)); @@ -21333,7 +21553,8 @@ index 000000000..5d6078bb3 +} + +void MacroAssembler::remove_frame(int framesize) { -+ assert(framesize > 0, "framesize must be > 0"); ++ assert(framesize >= 2, "framesize must include space for FP/RA"); ++ assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); + ld(fp, Address(sp, framesize - 2 * wordSize)); + ld(ra, Address(sp, framesize - wordSize)); + add(sp, sp, framesize); @@ -21364,6 +21585,222 @@ index 000000000..5d6078bb3 + bind(no_reserved_zone_enabling); +} + ++void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { ++ Label retry_load; ++ bind(retry_load); ++ // flush and load exclusive from the memory location ++ lr_w(tmp, counter_addr); ++ addw(tmp, tmp, 1); ++ // if we store+flush with no intervening write tmp wil be zero ++ sc_w(tmp, tmp, counter_addr); ++ bnez(tmp, retry_load); ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++int MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters, ++ Register flag) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ assert_different_registers(lock_reg, obj_reg, swap_reg); ++ ++ if (PrintBiasedLockingStatistics && counters == NULL) ++ counters = BiasedLocking::counters(); ++ ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0); ++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ int null_check_offset = -1; ++ if (!swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ ld(swap_reg, mark_addr); ++ } ++ andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); ++ li(t0, markOopDesc::biased_lock_pattern); ++ bne(t0, tmp_reg, cas_label); ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ load_prototype_header(tmp_reg, obj_reg); ++ orr(tmp_reg, tmp_reg, xthread); ++ xorr(tmp_reg, swap_reg, tmp_reg); ++ andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); ++ if (flag->is_valid()) { ++ mv(flag, tmp_reg); ++ } ++ if (counters != NULL) { ++ Label around; ++ bnez(tmp_reg, around); ++ atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); ++ j(done); ++ bind(around); ++ } else { ++ beqz(tmp_reg, done); ++ } ++ ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ bnez(t0, try_revoke_bias); ++ ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place); ++ bnez(t0, try_rebias); ++ ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ { ++ Label cas_success; ++ Label counter; ++ mv(t0, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, t0); ++ orr(tmp_reg, swap_reg, xthread); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); ++ // cas failed here if slow_cass == NULL ++ if (flag->is_valid()) { ++ mv(flag, 1); ++ j(counter); ++ } ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ bind(cas_success); ++ if (flag->is_valid()) { ++ mv(flag, 0); ++ bind(counter); ++ } ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), ++ tmp_reg, t0); ++ } ++ } ++ j(done); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ { ++ Label cas_success; ++ Label counter; ++ load_prototype_header(tmp_reg, obj_reg); ++ orr(tmp_reg, xthread, tmp_reg); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); ++ // cas failed here if slow_cass == NULL ++ if (flag->is_valid()) { ++ mv(flag, 1); ++ j(counter); ++ } ++ ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ bind(cas_success); ++ if (flag->is_valid()) { ++ mv(flag, 0); ++ bind(counter); ++ } ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), ++ tmp_reg, t0); ++ } ++ } ++ j(done); ++ ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ { ++ Label cas_success, nope; ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); ++ bind(cas_success); ++ ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, ++ t0); ++ } ++ bind(nope); ++ } ++ ++ bind(cas_label); ++ ++ return null_check_offset; ++} ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern); ++ if (flag->is_valid()) { mv(flag, tmp_reg); } ++ beqz(tmp_reg, done); ++} ++ +// Move the address of the polling page into dest. +void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { + if (SafepointMechanism::uses_thread_local_poll()) { @@ -21375,7 +21812,8 @@ index 000000000..5d6078bb3 + } +} + -+// Move the address of the polling page into dest. ++// Read the polling page. The address of the polling page must ++// already be in r. +void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) { + int32_t offset = 0; + get_polling_page(dest, page, offset, rtype); @@ -21384,9 +21822,9 @@ index 000000000..5d6078bb3 + +// Read the polling page. The address of the polling page must +// already be in r. -+void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { ++void MacroAssembler::read_polling_page(Register dest, int32_t offset, relocInfo::relocType rtype) { + code_section()->relocate(pc(), rtype); -+ lwu(zr, Address(r, offset)); ++ lwu(zr, Address(dest, offset)); +} + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { @@ -21400,8 +21838,9 @@ index 000000000..5d6078bb3 + } +#endif + int oop_index = oop_recorder()->find_index(obj); ++ InstructionMark im(this); + RelocationHolder rspec = oop_Relocation::spec(oop_index); -+ code_section()->relocate(pc(), rspec); ++ code_section()->relocate(inst_mark(), rspec); + li32(dst, 0xDEADBEEF); + zero_extend(dst, dst, 32); +} @@ -21412,8 +21851,9 @@ index 000000000..5d6078bb3 + int index = oop_recorder()->find_index(k); + assert(!Universe::heap()->is_in_reserved(k), "should not be an oop"); + ++ InstructionMark im(this); + RelocationHolder rspec = metadata_Relocation::spec(index); -+ code_section()->relocate(pc(), rspec); ++ code_section()->relocate(inst_mark(), rspec); + narrowKlass nk = Klass::encode_klass(k); + li32(dst, nk); + zero_extend(dst, dst, 32); @@ -21421,7 +21861,7 @@ index 000000000..5d6078bb3 + +// Maybe emit a call via a trampoline. If the code cache is small +// trampolines won't be emitted. -+address MacroAssembler::trampoline_call(Address entry) { ++address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { + assert(JavaThread::current()->is_Compiler_thread(), "just checking"); + assert(entry.rspec().type() == relocInfo::runtime_call_type || + entry.rspec().type() == relocInfo::opt_virtual_call_type || @@ -21442,22 +21882,22 @@ index 000000000..5d6078bb3 + if (!in_scratch_emit_size) { + address stub = emit_trampoline_stub(offset(), entry.target()); + if (stub == NULL) { -+ postcond(pc() == badAddress); ++ postcond(pc() == badAddress); + return NULL; // CodeCache is full + } + } + } + -+ address call_pc = pc(); ++ if (cbuf != NULL) { cbuf->set_insts_mark(); } + relocate(entry.rspec()); + if (!far_branches()) { + jal(entry.target()); + } else { + jal(pc()); + } -+ ++ // just need to return a non-null address + postcond(pc() != badAddress); -+ return call_pc; ++ return pc(); +} + +address MacroAssembler::ic_call(address entry, jint method_index) { @@ -21480,8 +21920,8 @@ index 000000000..5d6078bb3 + +address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, + address dest) { -+ // Max stub size: alignment nop, TrampolineStub. -+ address stub = start_a_stub(NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size); ++ address stub = start_a_stub(NativeInstruction::instruction_size ++ + NativeCallTrampolineStub::instruction_size); + if (stub == NULL) { + return NULL; // CodeBuffer::expand failed + } @@ -21492,7 +21932,8 @@ index 000000000..5d6078bb3 + + // make sure 4 byte aligned here, so that the destination address would be + // 8 byte aligned after 3 intructions -+ while (offset() % wordSize == 0) { nop(); } ++ // when we reach here we may get a 2-byte alignment so need to align it ++ align(wordSize, NativeCallTrampolineStub::data_offset); + + relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + + insts_call_instruction_offset)); @@ -21507,6 +21948,7 @@ index 000000000..5d6078bb3 + bind(target); + assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, + "should be"); ++ assert(offset() % wordSize == 0, "bad alignment"); + emit_int64((intptr_t)dest); + + const address stub_start_addr = addr_at(stub_start_offset); @@ -21522,54 +21964,26 @@ index 000000000..5d6078bb3 + case Address::base_plus_offset: + // This is the expected mode, although we allow all the other + // forms below. -+ return form_address(dst.base(), dst.offset(), 12, t1); ++ return form_address(t1, dst.base(), dst.offset()); + default: + la(t1, dst); + return Address(t1); + } +} + -+void MacroAssembler::increment(const Address dst, int64_t value) { -+ assert(((dst.getMode() == Address::base_plus_offset && -+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), -+ "invalid value and address mode combination"); -+ Address adr = add_memory_helper(dst); -+ assert(!adr.uses(t0), "invalid dst for address increment"); -+ ld(t0, adr); -+ add(t0, t0, value, t1); -+ sd(t0, adr); -+} -+ -+void MacroAssembler::incrementw(const Address dst, int32_t value) { -+ assert(((dst.getMode() == Address::base_plus_offset && -+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), -+ "invalid value and address mode combination"); -+ Address adr = add_memory_helper(dst); -+ assert(!adr.uses(t0), "invalid dst for address increment"); -+ lwu(t0, adr); -+ addw(t0, t0, value, t1); -+ sw(t0, adr); -+} -+ -+void MacroAssembler::decrement(const Address dst, int64_t value) { -+ assert(((dst.getMode() == Address::base_plus_offset && -+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), -+ "invalid value and address mode combination"); ++void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) { + Address adr = add_memory_helper(dst); -+ assert(!adr.uses(t0), "invalid dst for address decrement"); ++ assert_different_registers(adr.base(), t0); + ld(t0, adr); -+ sub(t0, t0, value, t1); ++ addi(t0, t0, imm); + sd(t0, adr); +} + -+void MacroAssembler::decrementw(const Address dst, int32_t value) { -+ assert(((dst.getMode() == Address::base_plus_offset && -+ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), -+ "invalid value and address mode combination"); ++void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) { + Address adr = add_memory_helper(dst); -+ assert(!adr.uses(t0), "invalid dst for address decrement"); ++ assert_different_registers(adr.base(), t0); + lwu(t0, adr); -+ subw(t0, t0, value, t1); ++ addiw(t0, t0, imm); + sw(t0, adr); +} + @@ -21581,2693 +21995,2308 @@ index 000000000..5d6078bb3 + beq(src1, t0, equal); +} + -+void MacroAssembler::oop_equal(Register obj1, Register obj2, Label& equal, bool is_far) { -+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->obj_equals(this, obj1, obj2, equal, is_far); ++// string indexof ++// compute index by trailing zeros ++void MacroAssembler::compute_index(Register haystack, Register trailing_zeros, ++ Register match_mask, Register result, ++ Register ch2, Register tmp, ++ bool haystack_isL) ++{ ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ srl(match_mask, match_mask, trailing_zeros); ++ srli(match_mask, match_mask, 1); ++ srli(tmp, trailing_zeros, LogBitsPerByte); ++ if (!haystack_isL) andi(tmp, tmp, 0xE); ++ add(haystack, haystack, tmp); ++ ld(ch2, Address(haystack)); ++ if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); ++ add(result, result, tmp); +} + -+void MacroAssembler::oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far) { -+ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); -+ bs->obj_nequals(this, obj1, obj2, nequal, is_far); ++// string indexof ++// Find pattern element in src, compute match mask, ++// only the first occurrence of 0x80/0x8000 at low bits is the valid match index ++// match mask patterns and corresponding indices would be like: ++// - 0x8080808080808080 (Latin1) ++// - 7 6 5 4 3 2 1 0 (match index) ++// - 0x8000800080008000 (UTF16) ++// - 3 2 1 0 (match index) ++void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, ++ Register mask1, Register mask2) ++{ ++ xorr(src, pattern, src); ++ sub(match_mask, src, mask1); ++ orr(src, src, mask2); ++ notr(src, src); ++ andr(match_mask, match_mask, src); +} + +#ifdef COMPILER2 -+// Set dst NaN if either source is NaN. -+void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, -+ bool is_double, bool is_min) { -+ assert_different_registers(dst, src1, src2); -+ Label Ldone; -+ fsflags(zr); -+ if (is_double) { -+ if (is_min) { -+ fmin_d(dst, src1, src2); -+ } else { -+ fmax_d(dst, src1, src2); -+ } -+ // flt is just used for set fflag NV -+ flt_d(zr, src1, src2); -+ } else { -+ if (is_min) { -+ fmin_s(dst, src1, src2); -+ } else { -+ fmax_s(dst, src1, src2); -+ } -+ // flt is just used for set fflag NV -+ flt_s(zr, src1, src2); -+ } -+ frflags(t0); -+ beqz(t0, Ldone); ++// Code for BigInteger::mulAdd instrinsic ++// out = x10 ++// in = x11 ++// offset = x12 (already out.length-offset) ++// len = x13 ++// k = x14 ++// tmp = x28 ++// ++// pseudo code from java implementation: ++// long kLong = k & LONG_MASK; ++// carry = 0; ++// offset = out.length-offset - 1; ++// for (int j = len - 1; j >= 0; j--) { ++// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; ++// out[offset--] = (int)product; ++// carry = product >>> 32; ++// } ++// return (int)carry; ++void MacroAssembler::mul_add(Register out, Register in, Register offset, ++ Register len, Register k, Register tmp) { ++ Label L_tail_loop, L_unroll, L_end; ++ mv(tmp, out); ++ mv(out, zr); ++ blez(len, L_end); ++ zero_extend(k, k, 32); ++ slliw(t0, offset, LogBytesPerInt); ++ add(offset, tmp, t0); ++ slliw(t0, len, LogBytesPerInt); ++ add(in, in, t0); ++ ++ const int unroll = 8; ++ li(tmp, unroll); ++ blt(len, tmp, L_tail_loop); ++ bind(L_unroll); ++ for (int i = 0; i < unroll; i++) { ++ sub(in, in, BytesPerInt); ++ lwu(t0, Address(in, 0)); ++ mul(t1, t0, k); ++ add(t0, t1, out); ++ sub(offset, offset, BytesPerInt); ++ lwu(t1, Address(offset, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(offset, 0)); ++ srli(out, t0, 32); ++ } ++ subw(len, len, tmp); ++ bge(len, tmp, L_unroll); ++ ++ bind(L_tail_loop); ++ blez(len, L_end); ++ sub(in, in, BytesPerInt); ++ lwu(t0, Address(in, 0)); ++ mul(t1, t0, k); ++ add(t0, t1, out); ++ sub(offset, offset, BytesPerInt); ++ lwu(t1, Address(offset, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(offset, 0)); ++ srli(out, t0, 32); ++ subw(len, len, 1); ++ j(L_tail_loop); + -+ // Src1 or src2 must be NaN here. Set dst NaN. -+ if (is_double) { -+ fadd_d(dst, src1, src2); -+ } else { -+ fadd_s(dst, src1, src2); -+ } -+ bind(Ldone); ++ bind(L_end); +} + -+address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, -+ Register tmp4, Register tmp5, Register tmp6, Register result, -+ Register cnt1, int elem_size) { -+ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ Register cnt2 = tmp2; // cnt2 only used in array length compare -+ Register elem_per_word = tmp6; -+ int log_elem_size = exact_log2(elem_size); -+ int length_offset = arrayOopDesc::length_offset_in_bytes(); -+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); -+ -+ assert(elem_size == 1 || elem_size == 2, "must be char or byte"); -+ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); -+ mv(elem_per_word, wordSize / elem_size); -+ -+ BLOCK_COMMENT("arrays_equals {"); -+ -+ // if (a1 == a2), return true -+ oop_equal(a1, a2, SAME); -+ -+ mv(result, false); -+ beqz(a1, DONE); -+ beqz(a2, DONE); -+ lwu(cnt1, Address(a1, length_offset)); -+ lwu(cnt2, Address(a2, length_offset)); -+ bne(cnt2, cnt1, DONE); -+ beqz(cnt1, SAME); ++// add two unsigned input and output carry ++void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) ++{ ++ assert_different_registers(dst, carry); ++ assert_different_registers(dst, src2); ++ add(dst, src1, src2); ++ sltu(carry, dst, src2); ++} + -+ slli(tmp5, cnt1, 3 + log_elem_size); -+ sub(tmp5, zr, tmp5); -+ add(a1, a1, base_offset); -+ add(a2, a2, base_offset); -+ ld(tmp3, Address(a1, 0)); -+ ld(tmp4, Address(a2, 0)); -+ ble(cnt1, elem_per_word, SHORT); // short or same ++// add two input with carry ++void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) ++{ ++ assert_different_registers(dst, carry); ++ add(dst, src1, src2); ++ add(dst, dst, carry); ++} + -+ // Main 16 byte comparison loop with 2 exits -+ bind(NEXT_DWORD); { -+ ld(tmp1, Address(a1, wordSize)); -+ ld(tmp2, Address(a2, wordSize)); -+ sub(cnt1, cnt1, 2 * wordSize / elem_size); -+ blez(cnt1, TAIL); -+ bne(tmp3, tmp4, DONE); -+ ld(tmp3, Address(a1, 2 * wordSize)); -+ ld(tmp4, Address(a2, 2 * wordSize)); -+ add(a1, a1, 2 * wordSize); -+ add(a2, a2, 2 * wordSize); -+ ble(cnt1, elem_per_word, TAIL2); -+ } beq(tmp1, tmp2, NEXT_DWORD); -+ j(DONE); ++// add two unsigned input with carry and output carry ++void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) ++{ ++ assert_different_registers(dst, src2); ++ adc(dst, src1, src2, carry); ++ sltu(carry, dst, src2); ++} + -+ bind(TAIL); -+ xorr(tmp4, tmp3, tmp4); -+ xorr(tmp2, tmp1, tmp2); -+ sll(tmp2, tmp2, tmp5); -+ orr(tmp5, tmp4, tmp2); -+ j(IS_TMP5_ZR); ++void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, ++ Register src1, Register src2, Register carry) ++{ ++ cad(dest_lo, dest_lo, src1, carry); ++ add(dest_hi, dest_hi, carry); ++ cad(dest_lo, dest_lo, src2, carry); ++ add(final_dest_hi, dest_hi, carry); ++} + -+ bind(TAIL2); -+ bne(tmp1, tmp2, DONE); ++/** ++ * Multiply 32 bit by 32 bit first loop. ++ */ ++void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx) ++{ ++ // jlong carry, x[], y[], z[]; ++ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { ++ // long product = y[idx] * x[xstart] + carry; ++ // z[kdx] = (int)product; ++ // carry = product >>> 32; ++ // } ++ // z[xstart] = (int)carry; + -+ bind(SHORT); -+ xorr(tmp4, tmp3, tmp4); -+ sll(tmp5, tmp4, tmp5); ++ Label L_first_loop, L_first_loop_exit; ++ blez(idx, L_first_loop_exit); + -+ bind(IS_TMP5_ZR); -+ bnez(tmp5, DONE); ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ lwu(x_xstart, Address(t0, 0)); + -+ bind(SAME); -+ mv(result, true); -+ // That's it. -+ bind(DONE); ++ bind(L_first_loop); ++ subw(idx, idx, 1); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ lwu(y_idx, Address(t0, 0)); ++ mul(product, x_xstart, y_idx); ++ add(product, product, carry); ++ srli(carry, product, 32); ++ subw(kdx, kdx, 1); ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sw(product, Address(t0, 0)); ++ bgtz(idx, L_first_loop); + -+ BLOCK_COMMENT("} array_equals"); -+ postcond(pc() != badAddress); -+ return pc(); ++ bind(L_first_loop_exit); +} + -+// Compare Strings ++/** ++ * Multiply 64 bit by 64 bit first loop. ++ */ ++void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx) ++{ ++ // ++ // jlong carry, x[], y[], z[]; ++ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { ++ // huge_128 product = y[idx] * x[xstart] + carry; ++ // z[kdx] = (jlong)product; ++ // carry = (jlong)(product >>> 64); ++ // } ++ // z[xstart] = carry; ++ // + -+// For Strings we're passed the address of the first characters in a1 -+// and a2 and the length in cnt1. -+// elem_size is the element size in bytes: either 1 or 2. -+// There are two implementations. For arrays >= 8 bytes, all -+// comparisons (including the final one, which may overlap) are -+// performed 8 bytes at a time. For strings < 8 bytes, we compare a -+// halfword, then a short, and then a byte. ++ Label L_first_loop, L_first_loop_exit; ++ Label L_one_x, L_one_y, L_multiply; + -+void MacroAssembler::string_equals(Register a1, Register a2, -+ Register result, Register cnt1, int elem_size) -+{ -+ Label SAME, DONE, SHORT, NEXT_WORD; -+ Register tmp1 = t0; -+ Register tmp2 = t1; ++ subw(xstart, xstart, 1); ++ bltz(xstart, L_one_x); + -+ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); -+ assert_different_registers(a1, a2, result, cnt1, t0, t1); ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ ld(x_xstart, Address(t0, 0)); ++ ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian + -+ BLOCK_COMMENT("string_equals {"); ++ bind(L_first_loop); ++ subw(idx, idx, 1); ++ bltz(idx, L_first_loop_exit); ++ subw(idx, idx, 1); ++ bltz(idx, L_one_y); + -+ beqz(cnt1, SAME); -+ mv(result, false); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ ld(y_idx, Address(t0, 0)); ++ ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian ++ bind(L_multiply); + -+ // Check for short strings, i.e. smaller than wordSize. -+ sub(cnt1, cnt1, wordSize); -+ blez(cnt1, SHORT); ++ mulhu(t0, x_xstart, y_idx); ++ mul(product, x_xstart, y_idx); ++ cad(product, product, carry, t1); ++ adc(carry, t0, zr, t1); + -+ // Main 8 byte comparison loop. -+ bind(NEXT_WORD); { -+ ld(tmp1, Address(a1, 0)); -+ add(a1, a1, wordSize); -+ ld(tmp2, Address(a2, 0)); -+ add(a2, a2, wordSize); -+ sub(cnt1, cnt1, wordSize); -+ bne(tmp1, tmp2, DONE); -+ } bgtz(cnt1, NEXT_WORD); ++ subw(kdx, kdx, 2); ++ ror_imm(product, product, 32); // back to big-endian ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sd(product, Address(t0, 0)); + -+ if (!AvoidUnalignedAccesses) { -+ // Last longword. In the case where length == 4 we compare the -+ // same longword twice, but that's still faster than another -+ // conditional branch. -+ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when -+ // length == 4. -+ add(tmp1, a1, cnt1); -+ ld(tmp1, Address(tmp1, 0)); -+ add(tmp2, a2, cnt1); -+ ld(tmp2, Address(tmp2, 0)); -+ bne(tmp1, tmp2, DONE); -+ j(SAME); -+ } ++ j(L_first_loop); + -+ bind(SHORT); -+ ld(tmp1, Address(a1)); -+ ld(tmp2, Address(a2)); -+ xorr(tmp1, tmp1, tmp2); -+ neg(cnt1, cnt1); -+ slli(cnt1, cnt1, LogBitsPerByte); -+ sll(tmp1, tmp1, cnt1); -+ bnez(tmp1, DONE); ++ bind(L_one_y); ++ lwu(y_idx, Address(y, 0)); ++ j(L_multiply); + -+ // Arrays are equal. -+ bind(SAME); -+ mv(result, true); ++ bind(L_one_x); ++ lwu(x_xstart, Address(x, 0)); ++ j(L_first_loop); + -+ // That's it. -+ bind(DONE); -+ BLOCK_COMMENT("} string_equals"); ++ bind(L_first_loop_exit); +} + -+typedef void (MacroAssembler::*load_chr_insn)(Register Rd, const Address &adr, Register temp); -+ -+// Compare strings. -+void MacroAssembler::string_compare(Register str1, Register str2, -+ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, -+ Register tmp3, int ae) ++/** ++ * Multiply 128 bit by 128 bit. Unrolled inner loop. ++ * ++ */ ++void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, ++ Register carry, Register carry2, ++ Register idx, Register jdx, ++ Register yz_idx1, Register yz_idx2, ++ Register tmp, Register tmp3, Register tmp4, ++ Register tmp6, Register product_hi) +{ -+ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, -+ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, -+ SHORT_LOOP_START, TAIL_CHECK, L; ++ // jlong carry, x[], y[], z[]; ++ // int kdx = xstart+1; ++ // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop ++ // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; ++ // jlong carry2 = (jlong)(tmp3 >>> 64); ++ // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; ++ // carry = (jlong)(tmp4 >>> 64); ++ // z[kdx+idx+1] = (jlong)tmp3; ++ // z[kdx+idx] = (jlong)tmp4; ++ // } ++ // idx += 2; ++ // if (idx > 0) { ++ // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; ++ // z[kdx+idx] = (jlong)yz_idx1; ++ // carry = (jlong)(yz_idx1 >>> 64); ++ // } ++ // + -+ const int STUB_THRESHOLD = 64 + 8; -+ bool isLL = ae == StrIntrinsicNode::LL; -+ bool isLU = ae == StrIntrinsicNode::LU; -+ bool isUL = ae == StrIntrinsicNode::UL; ++ Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; + -+ bool str1_isL = isLL || isLU; -+ bool str2_isL = isLL || isUL; ++ srliw(jdx, idx, 2); + -+ // for L strings, 1 byte for 1 character -+ // for U strings, 2 bytes for 1 character -+ int str1_chr_size = str1_isL ? 1 : 2; -+ int str2_chr_size = str2_isL ? 1 : 2; -+ int minCharsInWord = isLL ? wordSize : wordSize / 2; ++ bind(L_third_loop); + -+ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ subw(jdx, jdx, 1); ++ bltz(jdx, L_third_loop_exit); ++ subw(idx, idx, 4); + -+ BLOCK_COMMENT("string_compare {"); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ ld(yz_idx2, Address(t0, 0)); ++ ld(yz_idx1, Address(t0, wordSize)); + -+ // Bizzarely, the counts are passed in bytes, regardless of whether they -+ // are L or U strings, however the result is always in characters. -+ if (!str1_isL) { -+ sraiw(cnt1, cnt1, 1); -+ } -+ if (!str2_isL) { -+ sraiw(cnt2, cnt2, 1); -+ } ++ shadd(tmp6, idx, z, t0, LogBytesPerInt); + -+ // Compute the minimum of the string lengths and save the difference in result. -+ sub(result, cnt1, cnt2); -+ bgt(cnt1, cnt2, L); -+ mv(cnt2, cnt1); -+ bind(L); ++ ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian ++ ror_imm(yz_idx2, yz_idx2, 32); + -+ // A very short string -+ mv(t0, minCharsInWord); -+ ble(cnt2, t0, SHORT_STRING); ++ ld(t1, Address(tmp6, 0)); ++ ld(t0, Address(tmp6, wordSize)); + -+ // Compare longwords -+ // load first parts of strings and finish initialization while loading -+ { -+ if (str1_isL == str2_isL) { // LL or UU -+ // check if str1 and str2 are same string -+ beq(str1, str2, DONE); -+ // load 8 bytes once to compare -+ ld(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ mv(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ sub(cnt2, cnt2, minCharsInWord); -+ beqz(cnt2, TAIL_CHECK); -+ // convert cnt2 from characters to bytes -+ if(!str1_isL) { -+ slli(cnt2, cnt2, 1); -+ } -+ add(str2, str2, cnt2); -+ add(str1, str1, cnt2); -+ sub(cnt2, zr, cnt2); -+ } else if (isLU) { // LU case -+ lwu(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ mv(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ addi(cnt2, cnt2, -4); -+ add(str1, str1, cnt2); -+ sub(cnt1, zr, cnt2); -+ slli(cnt2, cnt2, 1); -+ add(str2, str2, cnt2); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ sub(cnt2, zr, cnt2); -+ addi(cnt1, cnt1, 4); -+ } else { // UL case -+ ld(tmp1, Address(str1)); -+ lwu(tmp2, Address(str2)); -+ mv(t0, STUB_THRESHOLD); -+ bge(cnt2, t0, STUB); -+ addi(cnt2, cnt2, -4); -+ slli(t0, cnt2, 1); -+ sub(cnt1, zr, t0); -+ add(str1, str1, t0); -+ add(str2, str2, cnt2); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ sub(cnt2, zr, cnt2); -+ addi(cnt1, cnt1, 8); -+ } -+ addi(cnt2, cnt2, isUL ? 4 : 8); -+ bgez(cnt2, TAIL); -+ xorr(tmp3, tmp1, tmp2); -+ bnez(tmp3, DIFFERENCE); ++ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 ++ mulhu(tmp4, product_hi, yz_idx1); + -+ // main loop -+ bind(NEXT_WORD); -+ if (str1_isL == str2_isL) { // LL or UU -+ add(t0, str1, cnt2); -+ ld(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ addi(cnt2, cnt2, 8); -+ } else if (isLU) { // LU case -+ add(t0, str1, cnt1); -+ lwu(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ addi(cnt1, cnt1, 4); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ addi(cnt2, cnt2, 8); -+ } else { // UL case -+ add(t0, str2, cnt2); -+ lwu(tmp2, Address(t0)); -+ add(t0, str1, cnt1); -+ ld(tmp1, Address(t0)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ addi(cnt1, cnt1, 8); -+ addi(cnt2, cnt2, 4); -+ } -+ bgez(cnt2, TAIL); ++ ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian ++ ror_imm(t1, t1, 32, tmp); + -+ xorr(tmp3, tmp1, tmp2); -+ beqz(tmp3, NEXT_WORD); -+ j(DIFFERENCE); -+ bind(TAIL); -+ xorr(tmp3, tmp1, tmp2); -+ bnez(tmp3, DIFFERENCE); -+ // Last longword. -+ if (AvoidUnalignedAccesses) { -+ // Aligned access. Load bytes from byte-aligned address, -+ // which may contain invalid bytes when remaining bytes is -+ // less than 4(UL/LU) or 8 (LL/UU). -+ // Invalid bytes should be removed before comparison. -+ if (str1_isL == str2_isL) { // LL or UU -+ add(t0, str1, cnt2); -+ ld(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ } else if (isLU) { // LU -+ add(t0, str1, cnt1); -+ lwu(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ ld(tmp2, Address(t0)); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ } else { // UL -+ add(t0, str1, cnt1); -+ ld(tmp1, Address(t0)); -+ add(t0, str2, cnt2); -+ lwu(tmp2, Address(t0)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ slli(cnt2, cnt2, 1); // UL case should convert cnt2 to bytes -+ } -+ // remove invalid bytes -+ slli(t0, cnt2, LogBitsPerByte); -+ sll(tmp1, tmp1, t0); -+ sll(tmp2, tmp2, t0); -+ } else { -+ // Last longword. In the case where length == 4 we compare the -+ // same longword twice, but that's still faster than another -+ // conditional branch. -+ if (str1_isL == str2_isL) { // LL or UU -+ ld(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ } else if (isLU) { // LU case -+ lwu(tmp1, Address(str1)); -+ ld(tmp2, Address(str2)); -+ inflate_lo32(tmp3, tmp1); -+ mv(tmp1, tmp3); -+ } else { // UL case -+ ld(tmp1, Address(str1)); -+ lwu(tmp2, Address(str2)); -+ inflate_lo32(tmp3, tmp2); -+ mv(tmp2, tmp3); -+ } -+ } -+ bind(TAIL_CHECK); -+ xorr(tmp3, tmp1, tmp2); -+ beqz(tmp3, DONE); ++ mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp ++ mulhu(carry2, product_hi, yz_idx2); + -+ // Find the first different characters in the longwords and -+ // compute their difference. -+ bind(DIFFERENCE); -+ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb -+ srl(tmp1, tmp1, result); -+ srl(tmp2, tmp2, result); -+ if (isLL) { -+ andi(tmp1, tmp1, 0xFF); -+ andi(tmp2, tmp2, 0xFF); -+ } else { -+ andi(tmp1, tmp1, 0xFFFF); -+ andi(tmp2, tmp2, 0xFFFF); -+ } -+ sub(result, tmp1, tmp2); -+ j(DONE); -+ } ++ cad(tmp3, tmp3, carry, carry); ++ adc(tmp4, tmp4, zr, carry); ++ cad(tmp3, tmp3, t0, t0); ++ cadc(tmp4, tmp4, tmp, t0); ++ adc(carry, carry2, zr, t0); ++ cad(tmp4, tmp4, t1, carry2); ++ adc(carry, carry, zr, carry2); + -+ bind(STUB); -+ RuntimeAddress stub = NULL; -+ switch (ae) { -+ case StrIntrinsicNode::LL: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); -+ break; -+ case StrIntrinsicNode::UU: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); -+ break; -+ case StrIntrinsicNode::LU: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); -+ break; -+ case StrIntrinsicNode::UL: -+ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+ assert(stub.target() != NULL, "compare_long_string stub has not been generated"); -+ trampoline_call(stub); -+ j(DONE); ++ ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian ++ ror_imm(tmp4, tmp4, 32); ++ sd(tmp4, Address(tmp6, 0)); ++ sd(tmp3, Address(tmp6, wordSize)); + -+ bind(SHORT_STRING); -+ // Is the minimum length zero? -+ beqz(cnt2, DONE); -+ // arrange code to do most branches while loading and loading next characters -+ // while comparing previous -+ (this->*str1_load_chr)(tmp1, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST_INIT); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ j(SHORT_LOOP_START); -+ bind(SHORT_LOOP); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST); -+ bind(SHORT_LOOP_START); -+ (this->*str1_load_chr)(tmp2, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ (this->*str2_load_chr)(t0, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ bne(tmp1, cnt1, SHORT_LOOP_TAIL); -+ addi(cnt2, cnt2, -1); -+ beqz(cnt2, SHORT_LAST2); -+ (this->*str1_load_chr)(tmp1, Address(str1), t0); -+ addi(str1, str1, str1_chr_size); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ beq(tmp2, t0, SHORT_LOOP); -+ sub(result, tmp2, t0); -+ j(DONE); -+ bind(SHORT_LOOP_TAIL); -+ sub(result, tmp1, cnt1); -+ j(DONE); -+ bind(SHORT_LAST2); -+ beq(tmp2, t0, DONE); -+ sub(result, tmp2, t0); ++ j(L_third_loop); + -+ j(DONE); -+ bind(SHORT_LAST_INIT); -+ (this->*str2_load_chr)(cnt1, Address(str2), t0); -+ addi(str2, str2, str2_chr_size); -+ bind(SHORT_LAST); -+ beq(tmp1, cnt1, DONE); -+ sub(result, tmp1, cnt1); ++ bind(L_third_loop_exit); + -+ bind(DONE); ++ andi(idx, idx, 0x3); ++ beqz(idx, L_post_third_loop_done); + -+ BLOCK_COMMENT("} string_compare"); -+} ++ Label L_check_1; ++ subw(idx, idx, 2); ++ bltz(idx, L_check_1); + -+// short string -+// StringUTF16.indexOfChar -+// StringLatin1.indexOfChar -+void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, -+ Register ch, Register result, -+ bool isL) -+{ -+ Register ch1 = t0; -+ Register index = t1; ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ ld(yz_idx1, Address(t0, 0)); ++ ror_imm(yz_idx1, yz_idx1, 32); + -+ BLOCK_COMMENT("string_indexof_char_short {"); ++ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 ++ mulhu(tmp4, product_hi, yz_idx1); + -+ Label LOOP, LOOP1, LOOP4, LOOP8; -+ Label MATCH, MATCH1, MATCH2, MATCH3, -+ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; ++ shadd(t0, idx, z, t0, LogBytesPerInt); ++ ld(yz_idx2, Address(t0, 0)); ++ ror_imm(yz_idx2, yz_idx2, 32, tmp); + -+ mv(result, -1); -+ mv(index, zr); ++ add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); + -+ bind(LOOP); -+ addi(t0, index, 8); -+ ble(t0, cnt1, LOOP8); -+ addi(t0, index, 4); -+ ble(t0, cnt1, LOOP4); -+ j(LOOP1); ++ ror_imm(tmp3, tmp3, 32, tmp); ++ sd(tmp3, Address(t0, 0)); + -+ bind(LOOP8); -+ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); -+ beq(ch, ch1, MATCH); -+ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); -+ beq(ch, ch1, MATCH1); -+ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); -+ beq(ch, ch1, MATCH2); -+ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); -+ beq(ch, ch1, MATCH3); -+ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); -+ beq(ch, ch1, MATCH4); -+ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); -+ beq(ch, ch1, MATCH5); -+ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); -+ beq(ch, ch1, MATCH6); -+ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); -+ beq(ch, ch1, MATCH7); -+ addi(index, index, 8); -+ addi(str1, str1, isL ? 8 : 16); -+ blt(index, cnt1, LOOP); -+ j(NOMATCH); ++ bind(L_check_1); + -+ bind(LOOP4); -+ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); -+ beq(ch, ch1, MATCH); -+ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); -+ beq(ch, ch1, MATCH1); -+ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); -+ beq(ch, ch1, MATCH2); -+ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); -+ beq(ch, ch1, MATCH3); -+ addi(index, index, 4); -+ addi(str1, str1, isL ? 4 : 8); -+ bge(index, cnt1, NOMATCH); ++ andi(idx, idx, 0x1); ++ subw(idx, idx, 1); ++ bltz(idx, L_post_third_loop_done); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ lwu(tmp4, Address(t0, 0)); ++ mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 ++ mulhu(carry2, tmp4, product_hi); + -+ bind(LOOP1); -+ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); -+ beq(ch, ch1, MATCH); -+ addi(index, index, 1); -+ addi(str1, str1, isL ? 1 : 2); -+ blt(index, cnt1, LOOP1); -+ j(NOMATCH); ++ shadd(t0, idx, z, t0, LogBytesPerInt); ++ lwu(tmp4, Address(t0, 0)); + -+ bind(MATCH1); -+ addi(index, index, 1); -+ j(MATCH); ++ add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0); + -+ bind(MATCH2); -+ addi(index, index, 2); -+ j(MATCH); ++ shadd(t0, idx, z, t0, LogBytesPerInt); ++ sw(tmp3, Address(t0, 0)); + -+ bind(MATCH3); -+ addi(index, index, 3); -+ j(MATCH); ++ slli(t0, carry2, 32); ++ srli(carry, tmp3, 32); ++ orr(carry, carry, t0); + -+ bind(MATCH4); -+ addi(index, index, 4); -+ j(MATCH); ++ bind(L_post_third_loop_done); ++} + -+ bind(MATCH5); -+ addi(index, index, 5); -+ j(MATCH); ++/** ++ * Code for BigInteger::multiplyToLen() intrinsic. ++ * ++ * x10: x ++ * x11: xlen ++ * x12: y ++ * x13: ylen ++ * x14: z ++ * x15: zlen ++ * x16: tmp1 ++ * x17: tmp2 ++ * x7: tmp3 ++ * x28: tmp4 ++ * x29: tmp5 ++ * x30: tmp6 ++ * x31: tmp7 ++ */ ++void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, ++ Register z, Register zlen, ++ Register tmp1, Register tmp2, Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, Register product_hi) ++{ ++ assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); + -+ bind(MATCH6); -+ addi(index, index, 6); -+ j(MATCH); ++ const Register idx = tmp1; ++ const Register kdx = tmp2; ++ const Register xstart = tmp3; + -+ bind(MATCH7); -+ addi(index, index, 7); ++ const Register y_idx = tmp4; ++ const Register carry = tmp5; ++ const Register product = xlen; ++ const Register x_xstart = zlen; // reuse register + -+ bind(MATCH); -+ mv(result, index); -+ bind(NOMATCH); -+ BLOCK_COMMENT("} string_indexof_char_short"); -+} ++ mv(idx, ylen); // idx = ylen; ++ mv(kdx, zlen); // kdx = xlen+ylen; ++ mv(carry, zr); // carry = 0; + -+// StringUTF16.indexOfChar -+// StringLatin1.indexOfChar -+void MacroAssembler::string_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ bool isL) -+{ -+ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; -+ Register ch1 = t0; -+ Register orig_cnt = t1; -+ Register mask1 = tmp3; -+ Register mask2 = tmp2; -+ Register match_mask = tmp1; -+ Register trailing_char = tmp4; -+ Register unaligned_elems = tmp4; ++ Label L_multiply_64_x_64_loop, L_done; + -+ BLOCK_COMMENT("string_indexof_char {"); -+ beqz(cnt1, NOMATCH); ++ subw(xstart, xlen, 1); ++ bltz(xstart, L_done); + -+ addi(t0, cnt1, isL ? -32 : -16); -+ bgtz(t0, DO_LONG); -+ string_indexof_char_short(str1, cnt1, ch, result, isL); -+ j(DONE); ++ const Register jdx = tmp1; + -+ bind(DO_LONG); -+ mv(orig_cnt, cnt1); + if (AvoidUnalignedAccesses) { -+ Label ALIGNED; -+ andi(unaligned_elems, str1, 0x7); -+ beqz(unaligned_elems, ALIGNED); -+ sub(unaligned_elems, unaligned_elems, 8); -+ neg(unaligned_elems, unaligned_elems); -+ if (!isL) { -+ srli(unaligned_elems, unaligned_elems, 1); -+ } -+ // do unaligned part per element -+ string_indexof_char_short(str1, unaligned_elems, ch, result, isL); -+ bgez(result, DONE); -+ mv(orig_cnt, cnt1); -+ sub(cnt1, cnt1, unaligned_elems); -+ bind(ALIGNED); -+ } ++ // Check if x and y are both 8-byte aligned. ++ orr(t0, xlen, ylen); ++ andi(t0, t0, 0x1); ++ beqz(t0, L_multiply_64_x_64_loop); ++ ++ multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ ++ Label L_second_loop_unaligned; ++ bind(L_second_loop_unaligned); ++ mv(carry, zr); ++ mv(jdx, ylen); ++ subw(xstart, xstart, 1); ++ bltz(xstart, L_done); ++ sub(sp, sp, 2 * wordSize); ++ sd(z, Address(sp, 0)); ++ sd(zr, Address(sp, wordSize)); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ addi(z, t0, 4); ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ lwu(product, Address(t0, 0)); ++ Label L_third_loop, L_third_loop_exit; ++ ++ blez(jdx, L_third_loop_exit); ++ ++ bind(L_third_loop); ++ subw(jdx, jdx, 1); ++ shadd(t0, jdx, y, t0, LogBytesPerInt); ++ lwu(t0, Address(t0, 0)); ++ mul(t1, t0, product); ++ add(t0, t1, carry); ++ shadd(tmp6, jdx, z, t1, LogBytesPerInt); ++ lwu(t1, Address(tmp6, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(tmp6, 0)); ++ srli(carry, t0, 32); ++ bgtz(jdx, L_third_loop); ++ ++ bind(L_third_loop_exit); ++ ld(z, Address(sp, 0)); ++ addi(sp, sp, 2 * wordSize); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); + -+ // duplicate ch -+ if (isL) { -+ slli(ch1, ch, 8); -+ orr(ch, ch1, ch); ++ j(L_second_loop_unaligned); + } -+ slli(ch1, ch, 16); -+ orr(ch, ch1, ch); -+ slli(ch1, ch, 32); -+ orr(ch, ch1, ch); + -+ if (!isL) { -+ slli(cnt1, cnt1, 1); -+ } ++ bind(L_multiply_64_x_64_loop); ++ multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + -+ mv(mask1, isL ? 0x0101010101010101 : 0x0001000100010001); -+ mv(mask2, isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); ++ Label L_second_loop_aligned; ++ beqz(kdx, L_second_loop_aligned); + -+ bind(CH1_LOOP); -+ ld(ch1, Address(str1)); -+ addi(str1, str1, 8); -+ addi(cnt1, cnt1, -8); -+ compute_match_mask(ch1, ch, match_mask, mask1, mask2); -+ bnez(match_mask, HIT); -+ bgtz(cnt1, CH1_LOOP); -+ j(NOMATCH); ++ Label L_carry; ++ subw(kdx, kdx, 1); ++ beqz(kdx, L_carry); + -+ bind(HIT); -+ ctzc_bit(trailing_char, match_mask, isL, ch1, result); -+ srli(trailing_char, trailing_char, 3); -+ addi(cnt1, cnt1, 8); -+ ble(cnt1, trailing_char, NOMATCH); -+ // match case -+ if (!isL) { -+ srli(cnt1, cnt1, 1); -+ srli(trailing_char, trailing_char, 1); -+ } ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ srli(carry, carry, 32); ++ subw(kdx, kdx, 1); + -+ sub(result, orig_cnt, cnt1); -+ add(result, result, trailing_char); -+ j(DONE); ++ bind(L_carry); ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); + -+ bind(NOMATCH); -+ mv(result, -1); ++ // Second and third (nested) loops. ++ // ++ // for (int i = xstart-1; i >= 0; i--) { // Second loop ++ // carry = 0; ++ // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop ++ // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + ++ // (z[k] & LONG_MASK) + carry; ++ // z[k] = (int)product; ++ // carry = product >>> 32; ++ // } ++ // z[i] = (int)carry; ++ // } ++ // ++ // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi + -+ bind(DONE); -+ BLOCK_COMMENT("} string_indexof_char"); -+} ++ bind(L_second_loop_aligned); ++ mv(carry, zr); // carry = 0; ++ mv(jdx, ylen); // j = ystart+1 + -+// Search for needle in haystack and return index or -1 -+// x10: result -+// x11: haystack -+// x12: haystack_len -+// x13: needle -+// x14: needle_len -+void MacroAssembler::string_indexof(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, -+ Register result, int ae) -+{ -+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ subw(xstart, xstart, 1); // i = xstart-1; ++ bltz(xstart, L_done); + -+ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; ++ sub(sp, sp, 4 * wordSize); ++ sd(z, Address(sp, 0)); + -+ Register ch1 = t0; -+ Register ch2 = t1; -+ Register nlen_tmp = tmp1; // needle len tmp -+ Register hlen_tmp = tmp2; // haystack len tmp -+ Register result_tmp = tmp4; -+ -+ bool isLL = ae == StrIntrinsicNode::LL; -+ -+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; -+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; -+ int needle_chr_shift = needle_isL ? 0 : 1; -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ int needle_chr_size = needle_isL ? 1 : 2; -+ int haystack_chr_size = haystack_isL ? 1 : 2; -+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ -+ BLOCK_COMMENT("string_indexof {"); -+ -+ // Note, inline_string_indexOf() generates checks: -+ // if (pattern.count > src.count) return -1; -+ // if (pattern.count == 0) return 0; -+ -+ // We have two strings, a source string in haystack, haystack_len and a pattern string -+ // in needle, needle_len. Find the first occurence of pattern in source or return -1. -+ -+ // For larger pattern and source we use a simplified Boyer Moore algorithm. -+ // With a small pattern and source we use linear scan. -+ -+ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. -+ sub(result_tmp, haystack_len, needle_len); -+ // needle_len < 8, use linear scan -+ sub(t0, needle_len, 8); -+ bltz(t0, LINEARSEARCH); -+ // needle_len >= 256, use linear scan -+ sub(t0, needle_len, 256); -+ bgez(t0, LINEARSTUB); -+ // needle_len >= haystack_len/4, use linear scan -+ srli(t0, haystack_len, 2); -+ bge(needle_len, t0, LINEARSTUB); -+ -+ // Boyer-Moore-Horspool introduction: -+ // The Boyer Moore alogorithm is based on the description here:- -+ // -+ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm -+ // -+ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule -+ // and the 'Good Suffix' rule. -+ // -+ // These rules are essentially heuristics for how far we can shift the -+ // pattern along the search string. -+ // -+ // The implementation here uses the 'Bad Character' rule only because of the -+ // complexity of initialisation for the 'Good Suffix' rule. -+ // -+ // This is also known as the Boyer-Moore-Horspool algorithm: -+ // -+ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm -+ // -+ // #define ASIZE 256 -+ // -+ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { -+ // int i, j; -+ // unsigned c; -+ // unsigned char bc[ASIZE]; -+ // -+ // /* Preprocessing */ -+ // for (i = 0; i < ASIZE; ++i) -+ // bc[i] = m; -+ // for (i = 0; i < m - 1; ) { -+ // c = pattern[i]; -+ // ++i; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef PATTERN_STRING_IS_LATIN1 -+ // bc[c] = m - i; -+ // #else -+ // if (c < ASIZE) bc[c] = m - i; -+ // #endif -+ // } -+ // -+ // /* Searching */ -+ // j = 0; -+ // while (j <= n - m) { -+ // c = src[i+j]; -+ // if (pattern[m-1] == c) -+ // int k; -+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); -+ // if (k < 0) return j; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 -+ // // LL case: (c< 256) always true. Remove branch -+ // j += bc[pattern[j+m-1]]; -+ // #endif -+ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF -+ // // UU case: need if (c if not. -+ // if (c < ASIZE) -+ // j += bc[pattern[j+m-1]]; -+ // else -+ // j += m -+ // #endif -+ // } -+ // return -1; -+ // } -+ -+ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result -+ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, -+ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; ++ Label L_last_x; ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ addi(z, t0, 4); ++ subw(xstart, xstart, 1); // i = xstart-1; ++ bltz(xstart, L_last_x); + -+ Register haystack_end = haystack_len; -+ Register skipch = tmp2; ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ ld(product_hi, Address(t0, 0)); ++ ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian + -+ // pattern length is >=8, so, we can read at least 1 register for cases when -+ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for -+ // UL case. We'll re-read last character in inner pre-loop code to have -+ // single outer pre-loop load -+ const int firstStep = isLL ? 7 : 3; ++ Label L_third_loop_prologue; ++ bind(L_third_loop_prologue); + -+ const int ASIZE = 256; -+ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) ++ sd(ylen, Address(sp, wordSize)); ++ sd(x, Address(sp, 2 * wordSize)); ++ sd(xstart, Address(sp, 3 * wordSize)); ++ multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, ++ tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); ++ ld(z, Address(sp, 0)); ++ ld(ylen, Address(sp, wordSize)); ++ ld(x, Address(sp, 2 * wordSize)); ++ ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen ++ addi(sp, sp, 4 * wordSize); + -+ sub(sp, sp, ASIZE); ++ addiw(tmp3, xlen, 1); ++ shadd(t0, tmp3, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); + -+ // init BC offset table with default value: needle_len -+ slli(t0, needle_len, 8); -+ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] -+ slli(tmp1, t0, 16); -+ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] -+ slli(tmp1, t0, 32); -+ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] ++ subw(tmp3, tmp3, 1); ++ bltz(tmp3, L_done); + -+ mv(ch1, sp); // ch1 is t0 -+ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations ++ srli(carry, carry, 32); ++ shadd(t0, tmp3, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ j(L_second_loop_aligned); + -+ bind(BM_INIT_LOOP); -+ // for (i = 0; i < ASIZE; ++i) -+ // bc[i] = m; -+ for (int i = 0; i < 4; i++) { -+ sd(tmp5, Address(ch1, i * wordSize)); -+ } -+ add(ch1, ch1, 32); -+ sub(tmp6, tmp6, 4); -+ bgtz(tmp6, BM_INIT_LOOP); ++ // Next infrequent code is moved outside loops. ++ bind(L_last_x); ++ lwu(product_hi, Address(x, 0)); ++ j(L_third_loop_prologue); + -+ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern -+ Register orig_haystack = tmp5; -+ mv(orig_haystack, haystack); -+ // result_tmp = tmp4 -+ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); -+ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 -+ mv(tmp3, needle); ++ bind(L_done); ++} ++#endif + -+ // for (i = 0; i < m - 1; ) { -+ // c = pattern[i]; -+ // ++i; -+ // // c < 256 for Latin1 string, so, no need for branch -+ // #ifdef PATTERN_STRING_IS_LATIN1 -+ // bc[c] = m - i; -+ // #else -+ // if (c < ASIZE) bc[c] = m - i; -+ // #endif -+ // } -+ bind(BCLOOP); -+ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); -+ add(tmp3, tmp3, needle_chr_size); -+ if (!needle_isL) { -+ // ae == StrIntrinsicNode::UU -+ mv(tmp6, ASIZE); -+ bgeu(ch1, tmp6, BCSKIP); ++// Count bits of trailing zero chars from lsb to msb until first non-zero element. ++// For LL case, one byte for one element, so shift 8 bits once, and for other case, ++// shift 16 bits once. ++void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) ++{ ++ if (UseZbb) { ++ assert_different_registers(Rd, Rs, tmp1); ++ int step = isLL ? 8 : 16; ++ ctz(Rd, Rs); ++ andi(tmp1, Rd, step - 1); ++ sub(Rd, Rd, tmp1); ++ return; + } -+ add(tmp4, sp, ch1); -+ sb(ch2, Address(tmp4)); // store skip offset to BC offset table ++ assert_different_registers(Rd, Rs, tmp1, tmp2); ++ Label Loop; ++ int step = isLL ? 8 : 16; ++ li(Rd, -step); ++ mv(tmp2, Rs); + -+ bind(BCSKIP); -+ sub(ch2, ch2, 1); // for next pattern element, skip distance -1 -+ bgtz(ch2, BCLOOP); ++ bind(Loop); ++ addi(Rd, Rd, step); ++ andi(tmp1, tmp2, ((1 << step) - 1)); ++ srli(tmp2, tmp2, step); ++ beqz(tmp1, Loop); ++} + -+ // tmp6: pattern end, address after needle -+ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); -+ if (needle_isL == haystack_isL) { -+ // load last 8 bytes (8LL/4UU symbols) -+ ld(tmp6, Address(tmp6, -wordSize)); -+ } else { -+ // UL: from UTF-16(source) search Latin1(pattern) -+ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) -+ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d -+ // We'll have to wait until load completed, but it's still faster than per-character loads+checks -+ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a -+ slli(ch2, tmp6, XLEN - 24); -+ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b -+ slli(ch1, tmp6, XLEN - 16); -+ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c -+ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d -+ slli(ch2, ch2, 16); -+ orr(ch2, ch2, ch1); // 0x00000b0c -+ slli(result, tmp3, 48); // use result as temp register -+ orr(tmp6, tmp6, result); // 0x0a00000d -+ slli(result, ch2, 16); -+ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d ++// This instruction reads adjacent 4 bytes from the lower half of source register, ++// inflate into a register, for example: ++// Rs: A7A6A5A4A3A2A1A0 ++// Rd: 00A300A200A100A0 ++void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) ++{ ++ assert_different_registers(Rd, Rs, tmp1, tmp2); ++ li(tmp1, 0xFF); ++ mv(Rd, zr); ++ for (int i = 0; i <= 3; i++) ++ { ++ andr(tmp2, Rs, tmp1); ++ if (i) { ++ slli(tmp2, tmp2, i * 8); ++ } ++ orr(Rd, Rd, tmp2); ++ if (i != 3) { ++ slli(tmp1, tmp1, 8); ++ } + } ++} + -+ // i = m - 1; -+ // skipch = j + i; -+ // if (skipch == pattern[m - 1] -+ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); -+ // else -+ // move j with bad char offset table -+ bind(BMLOOPSTR2); -+ // compare pattern to source string backward -+ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); -+ (this->*haystack_load_1chr)(skipch, Address(result), noreg); -+ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 -+ if (needle_isL == haystack_isL) { -+ // re-init tmp3. It's for free because it's executed in parallel with -+ // load above. Alternative is to initialize it before loop, but it'll -+ // affect performance on in-order systems with 2 or more ld/st pipelines -+ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] -+ } -+ if (!isLL) { // UU/UL case -+ slli(ch2, nlen_tmp, 1); // offsets in bytes -+ } -+ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char -+ add(result, haystack, isLL ? nlen_tmp : ch2); -+ ld(ch2, Address(result)); // load 8 bytes from source string -+ mv(ch1, tmp6); -+ if (isLL) { -+ j(BMLOOPSTR1_AFTER_LOAD); -+ } else { -+ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 -+ j(BMLOOPSTR1_CMP); ++// This instruction reads adjacent 4 bytes from the upper half of source register, ++// inflate into a register, for example: ++// Rs: A7A6A5A4A3A2A1A0 ++// Rd: 00A700A600A500A4 ++void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) ++{ ++ assert_different_registers(Rd, Rs, tmp1, tmp2); ++ li(tmp1, 0xFF00000000); ++ mv(Rd, zr); ++ for (int i = 0; i <= 3; i++) ++ { ++ andr(tmp2, Rs, tmp1); ++ orr(Rd, Rd, tmp2); ++ srli(Rd, Rd, 8); ++ if (i != 3) { ++ slli(tmp1, tmp1, 8); ++ } + } ++} + -+ bind(BMLOOPSTR1); -+ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); -+ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); -+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++// The size of the blocks erased by the zero_blocks stub. We must ++// handle anything smaller than this ourselves in zero_words(). ++const int MacroAssembler::zero_words_block_size = 8; + -+ bind(BMLOOPSTR1_AFTER_LOAD); -+ sub(nlen_tmp, nlen_tmp, 1); -+ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); ++// zero_words() is used by C2 ClearArray patterns. It is as small as ++// possible, handling small word counts locally and delegating ++// anything larger to the zero_blocks stub. It is expanded many times ++// in compiled code, so it is important to keep it short. + -+ bind(BMLOOPSTR1_CMP); -+ beq(ch1, ch2, BMLOOPSTR1); ++// ptr: Address of a buffer to be zeroed. ++// cnt: Count in HeapWords. ++// ++// ptr, cnt, and t0 are clobbered. ++address MacroAssembler::zero_words(Register ptr, Register cnt) ++{ ++ assert(is_power_of_2(zero_words_block_size), "adjust this"); ++ assert(ptr == x28 && cnt == x29, "mismatch in register usage"); ++ assert_different_registers(cnt, t0); + -+ bind(BMSKIP); -+ if (!isLL) { -+ // if we've met UTF symbol while searching Latin1 pattern, then we can -+ // skip needle_len symbols -+ if (needle_isL != haystack_isL) { -+ mv(result_tmp, needle_len); ++ BLOCK_COMMENT("zero_words {"); ++ mv(t0, zero_words_block_size); ++ Label around, done, done16; ++ bltu(cnt, t0, around); ++ { ++ RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); ++ assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); ++ if (StubRoutines::riscv::complete()) { ++ address tpc = trampoline_call(zero_blocks); ++ if (tpc == NULL) { ++ DEBUG_ONLY(reset_labels1(around)); ++ postcond(pc() == badAddress); ++ return NULL; ++ } + } else { -+ mv(result_tmp, 1); ++ jal(zero_blocks); + } -+ mv(t0, ASIZE); -+ bgeu(skipch, t0, BMADV); + } -+ add(result_tmp, sp, skipch); -+ lbu(result_tmp, Address(result_tmp)); // load skip offset ++ bind(around); ++ for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { ++ Label l; ++ andi(t0, cnt, i); ++ beqz(t0, l); ++ for (int j = 0; j < i; j++) { ++ sd(zr, Address(ptr, 0)); ++ addi(ptr, ptr, 8); ++ } ++ bind(l); ++ } ++ { ++ Label l; ++ andi(t0, cnt, 1); ++ beqz(t0, l); ++ sd(zr, Address(ptr, 0)); ++ bind(l); ++ } ++ BLOCK_COMMENT("} zero_words"); ++ postcond(pc() != badAddress); ++ return pc(); ++} + -+ bind(BMADV); -+ sub(nlen_tmp, needle_len, 1); -+ // move haystack after bad char skip offset -+ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); -+ ble(haystack, haystack_end, BMLOOPSTR2); -+ add(sp, sp, ASIZE); -+ j(NOMATCH); ++#define SmallArraySize (18 * BytesPerLong) + -+ bind(BMLOOPSTR1_LASTCMP); -+ bne(ch1, ch2, BMSKIP); ++// base: Address of a buffer to be zeroed, 8 bytes aligned. ++// cnt: Immediate count in HeapWords. ++void MacroAssembler::zero_words(Register base, u_int64_t cnt) ++{ ++ assert_different_registers(base, t0, t1); + -+ bind(BMMATCH); -+ sub(result, haystack, orig_haystack); -+ if (!haystack_isL) { -+ srli(result, result, 1); -+ } -+ add(sp, sp, ASIZE); -+ j(DONE); ++ BLOCK_COMMENT("zero_words {"); + -+ bind(LINEARSTUB); -+ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm -+ bltz(t0, LINEARSEARCH); -+ mv(result, zr); -+ RuntimeAddress stub = NULL; -+ if (isLL) { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); -+ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); -+ } else if (needle_isL) { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); -+ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); ++ if (cnt <= SmallArraySize / BytesPerLong) { ++ for (int i = 0; i < (int)cnt; i++) { ++ sd(zr, Address(base, i * wordSize)); ++ } + } else { -+ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); -+ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); -+ } -+ trampoline_call(stub); -+ j(DONE); -+ -+ bind(NOMATCH); -+ mv(result, -1); -+ j(DONE); ++ const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll ++ int remainder = cnt % unroll; ++ for (int i = 0; i < remainder; i++) { ++ sd(zr, Address(base, i * wordSize)); ++ } + -+ bind(LINEARSEARCH); -+ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); ++ Label loop; ++ Register cnt_reg = t0; ++ Register loop_base = t1; ++ cnt = cnt - remainder; ++ li(cnt_reg, cnt); ++ add(loop_base, base, remainder * wordSize); ++ bind(loop); ++ sub(cnt_reg, cnt_reg, unroll); ++ for (int i = 0; i < unroll; i++) { ++ sd(zr, Address(loop_base, i * wordSize)); ++ } ++ add(loop_base, loop_base, unroll * wordSize); ++ bnez(cnt_reg, loop); ++ } + -+ bind(DONE); -+ BLOCK_COMMENT("} string_indexof"); ++ BLOCK_COMMENT("} zero_words"); +} + -+// string_indexof -+// result: x10 -+// src: x11 -+// src_count: x12 -+// pattern: x13 -+// pattern_count: x14 or 1/2/3/4 -+void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ int needle_con_cnt, Register result, int ae) ++// base: Address of a buffer to be filled, 8 bytes aligned. ++// cnt: Count in 8-byte unit. ++// value: Value to be filled with. ++// base will point to the end of the buffer after filling. ++void MacroAssembler::fill_words(Register base, Register cnt, Register value) +{ -+ // Note: -+ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant -+ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 -+ assert(needle_con_cnt <= 4, "Invalid needle constant count"); -+ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++// Algorithm: ++// ++// t0 = cnt & 7 ++// cnt -= t0 ++// p += t0 ++// switch (t0): ++// switch start: ++// do while cnt ++// cnt -= 8 ++// p[-8] = value ++// case 7: ++// p[-7] = value ++// case 6: ++// p[-6] = value ++// // ... ++// case 1: ++// p[-1] = value ++// case 0: ++// p += 8 ++// do-while end ++// switch end + -+ Register ch1 = t0; -+ Register ch2 = t1; -+ Register hlen_neg = haystack_len, nlen_neg = needle_len; -+ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; ++ assert_different_registers(base, cnt, value, t0, t1); + -+ bool isLL = ae == StrIntrinsicNode::LL; ++ Label fini, skip, entry, loop; ++ const int unroll = 8; // Number of sd instructions we'll unroll + -+ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; -+ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; -+ int needle_chr_shift = needle_isL ? 0 : 1; -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ int needle_chr_size = needle_isL ? 1 : 2; -+ int haystack_chr_size = haystack_isL ? 1 : 2; ++ beqz(cnt, fini); + -+ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : -+ (load_chr_insn)&MacroAssembler::lhu; -+ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; -+ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; ++ andi(t0, cnt, unroll - 1); ++ sub(cnt, cnt, t0); ++ // align 8, so first sd n % 8 = mod, next loop sd 8 * n. ++ shadd(base, t0, base, t1, 3); ++ la(t1, entry); ++ slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) ++ sub(t1, t1, t0); ++ jr(t1); + -+ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; ++ bind(loop); ++ add(base, base, unroll * 8); ++ for (int i = -unroll; i < 0; i++) { ++ sd(value, Address(base, i * 8)); ++ } ++ bind(entry); ++ sub(cnt, cnt, unroll); ++ bgez(cnt, loop); + -+ Register first = tmp3; ++ bind(fini); ++} + -+ if (needle_con_cnt == -1) { -+ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; ++#define FCVT_SAFE(FLOATCVT, FLOATEQ) \ ++void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ ++ Label L_Okay; \ ++ fscsr(zr); \ ++ FLOATCVT(dst, src); \ ++ frcsr(tmp); \ ++ andi(tmp, tmp, 0x1E); \ ++ beqz(tmp, L_Okay); \ ++ FLOATEQ(tmp, src, src); \ ++ bnez(tmp, L_Okay); \ ++ mv(dst, zr); \ ++ bind(L_Okay); \ ++} + -+ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); -+ bltz(t0, DOSHORT); ++FCVT_SAFE(fcvt_w_s, feq_s) ++FCVT_SAFE(fcvt_l_s, feq_s) ++FCVT_SAFE(fcvt_w_d, feq_d) ++FCVT_SAFE(fcvt_l_d, feq_d) + -+ (this->*needle_load_1chr)(first, Address(needle), noreg); -+ slli(t0, needle_len, needle_chr_shift); -+ add(needle, needle, t0); -+ neg(nlen_neg, t0); -+ slli(t0, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, t0); -+ neg(hlen_neg, t0); ++#undef FCVT_SAFE + -+ bind(FIRST_LOOP); -+ add(t0, haystack, hlen_neg); -+ (this->*haystack_load_1chr)(ch2, Address(t0), noreg); -+ beq(first, ch2, STR1_LOOP); ++#define FCMP(FLOATTYPE, FLOATSIG) \ ++void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ ++ FloatRegister Rs2, int unordered_result) { \ ++ Label Ldone; \ ++ if (unordered_result < 0) { \ ++ /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ ++ /* installs 1 if gt else 0 */ \ ++ flt_##FLOATSIG(result, Rs2, Rs1); \ ++ /* Rs1 > Rs2, install 1 */ \ ++ bgtz(result, Ldone); \ ++ feq_##FLOATSIG(result, Rs1, Rs2); \ ++ addi(result, result, -1); \ ++ /* Rs1 = Rs2, install 0 */ \ ++ /* NaN or Rs1 < Rs2, install -1 */ \ ++ bind(Ldone); \ ++ } else { \ ++ /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ ++ /* installs 1 if gt or unordered else 0 */ \ ++ flt_##FLOATSIG(result, Rs1, Rs2); \ ++ /* Rs1 < Rs2, install -1 */ \ ++ bgtz(result, Ldone); \ ++ feq_##FLOATSIG(result, Rs1, Rs2); \ ++ addi(result, result, -1); \ ++ /* Rs1 = Rs2, install 0 */ \ ++ /* NaN or Rs1 > Rs2, install 1 */ \ ++ bind(Ldone); \ ++ neg(result, result); \ ++ } \ ++} + -+ bind(STR2_NEXT); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, FIRST_LOOP); -+ j(NOMATCH); ++FCMP(float, s); ++FCMP(double, d); + -+ bind(STR1_LOOP); -+ add(nlen_tmp, nlen_neg, needle_chr_size); -+ add(hlen_tmp, hlen_neg, haystack_chr_size); -+ bgez(nlen_tmp, MATCH); ++#undef FCMP + -+ bind(STR1_NEXT); -+ add(ch1, needle, nlen_tmp); -+ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); -+ add(ch2, haystack, hlen_tmp); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -+ bne(ch1, ch2, STR2_NEXT); -+ add(nlen_tmp, nlen_tmp, needle_chr_size); -+ add(hlen_tmp, hlen_tmp, haystack_chr_size); -+ bltz(nlen_tmp, STR1_NEXT); -+ j(MATCH); ++// Zero words; len is in bytes ++// Destroys all registers except addr ++// len must be a nonzero multiple of wordSize ++void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) { ++ assert_different_registers(addr, len, tmp, t0, t1); + -+ bind(DOSHORT); -+ if (needle_isL == haystack_isL) { -+ sub(t0, needle_len, 2); -+ bltz(t0, DO1); -+ bgtz(t0, DO3); -+ } ++#ifdef ASSERT ++ { ++ Label L; ++ andi(t0, len, BytesPerWord - 1); ++ beqz(t0, L); ++ stop("len is not a multiple of BytesPerWord"); ++ bind(L); + } ++#endif // ASSERT + -+ if (needle_con_cnt == 4) { -+ Label CH1_LOOP; -+ (this->*load_4chr)(ch1, Address(needle), noreg); -+ sub(result_tmp, haystack_len, 4); -+ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); ++#ifndef PRODUCT ++ block_comment("zero memory"); ++#endif // PRODUCT + -+ bind(CH1_LOOP); -+ add(ch2, haystack, hlen_neg); -+ (this->*load_4chr)(ch2, Address(ch2), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, CH1_LOOP); -+ j(NOMATCH); -+ } ++ Label loop; ++ Label entry; + -+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { -+ Label CH1_LOOP; -+ BLOCK_COMMENT("string_indexof DO2 {"); -+ bind(DO2); -+ (this->*load_2chr)(ch1, Address(needle), noreg); -+ if (needle_con_cnt == 2) { -+ sub(result_tmp, haystack_len, 2); -+ } -+ slli(tmp3, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); ++ // Algorithm: ++ // ++ // t0 = cnt & 7 ++ // cnt -= t0 ++ // p += t0 ++ // switch (t0) { ++ // do { ++ // cnt -= 8 ++ // p[-8] = 0 ++ // case 7: ++ // p[-7] = 0 ++ // case 6: ++ // p[-6] = 0 ++ // ... ++ // case 1: ++ // p[-1] = 0 ++ // case 0: ++ // p += 8 ++ // } while (cnt) ++ // } + -+ bind(CH1_LOOP); -+ add(tmp3, haystack, hlen_neg); -+ (this->*load_2chr)(ch2, Address(tmp3), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, CH1_LOOP); -+ j(NOMATCH); -+ BLOCK_COMMENT("} string_indexof DO2"); -+ } ++ const int unroll = 8; // Number of sd(zr) instructions we'll unroll + -+ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { -+ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; -+ BLOCK_COMMENT("string_indexof DO3 {"); ++ srli(len, len, LogBytesPerWord); ++ andi(t0, len, unroll - 1); // t0 = cnt % unroll ++ sub(len, len, t0); // cnt -= unroll ++ // tmp always points to the end of the region we're about to zero ++ shadd(tmp, t0, addr, t1, LogBytesPerWord); ++ la(t1, entry); ++ slli(t0, t0, 2); ++ sub(t1, t1, t0); ++ jr(t1); ++ bind(loop); ++ sub(len, len, unroll); ++ for (int i = -unroll; i < 0; i++) { ++ Assembler::sd(zr, Address(tmp, i * wordSize)); ++ } ++ bind(entry); ++ add(tmp, tmp, unroll * wordSize); ++ bnez(len, loop); ++} + -+ bind(DO3); -+ (this->*load_2chr)(first, Address(needle), noreg); -+ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); -+ if (needle_con_cnt == 3) { -+ sub(result_tmp, haystack_len, 3); ++// shift left by shamt and add ++// Rd = (Rs1 << shamt) + Rs2 ++void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { ++ if (UseZba) { ++ if (shamt == 1) { ++ sh1add(Rd, Rs1, Rs2); ++ return; ++ } else if (shamt == 2) { ++ sh2add(Rd, Rs1, Rs2); ++ return; ++ } else if (shamt == 3) { ++ sh3add(Rd, Rs1, Rs2); ++ return; + } -+ slli(hlen_tmp, result_tmp, haystack_chr_shift); -+ add(haystack, haystack, hlen_tmp); -+ neg(hlen_neg, hlen_tmp); ++ } + -+ bind(FIRST_LOOP); -+ add(ch2, haystack, hlen_neg); -+ (this->*load_2chr)(ch2, Address(ch2), noreg); -+ beq(first, ch2, STR1_LOOP); ++ if (shamt != 0) { ++ slli(tmp, Rs1, shamt); ++ add(Rd, Rs2, tmp); ++ } else { ++ add(Rd, Rs1, Rs2); ++ } ++} + -+ bind(STR2_NEXT); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, FIRST_LOOP); -+ j(NOMATCH); ++void MacroAssembler::zero_extend(Register dst, Register src, int bits) { ++ if (UseZba && bits == 32) { ++ zext_w(dst, src); ++ return; ++ } + -+ bind(STR1_LOOP); -+ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); -+ add(ch2, haystack, hlen_tmp); -+ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); -+ bne(ch1, ch2, STR2_NEXT); -+ j(MATCH); -+ BLOCK_COMMENT("} string_indexof DO3"); ++ if (UseZbb && bits == 16) { ++ zext_h(dst, src); ++ return; + } + -+ if (needle_con_cnt == -1 || needle_con_cnt == 1) { -+ Label DO1_LOOP; ++ if (bits == 8) { ++ zext_b(dst, src); ++ } else { ++ slli(dst, src, XLEN - bits); ++ srli(dst, dst, XLEN - bits); ++ } ++} + -+ BLOCK_COMMENT("string_indexof DO1 {"); -+ bind(DO1); -+ (this->*needle_load_1chr)(ch1, Address(needle), noreg); -+ sub(result_tmp, haystack_len, 1); -+ mv(tmp3, result_tmp); -+ if (haystack_chr_shift) { -+ slli(tmp3, result_tmp, haystack_chr_shift); ++void MacroAssembler::sign_extend(Register dst, Register src, int bits) { ++ if (UseZbb) { ++ if (bits == 8) { ++ sext_b(dst, src); ++ return; ++ } else if (bits == 16) { ++ sext_h(dst, src); ++ return; + } -+ add(haystack, haystack, tmp3); -+ neg(hlen_neg, tmp3); -+ -+ bind(DO1_LOOP); -+ add(tmp3, haystack, hlen_neg); -+ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); -+ beq(ch1, ch2, MATCH); -+ add(hlen_neg, hlen_neg, haystack_chr_size); -+ blez(hlen_neg, DO1_LOOP); -+ BLOCK_COMMENT("} string_indexof DO1"); + } + -+ bind(NOMATCH); -+ mv(result, -1); -+ j(DONE); -+ -+ bind(MATCH); -+ srai(t0, hlen_neg, haystack_chr_shift); -+ add(result, result_tmp, t0); -+ -+ bind(DONE); ++ if (bits == 32) { ++ sext_w(dst, src); ++ } else { ++ slli(dst, src, XLEN - bits); ++ srai(dst, dst, XLEN - bits); ++ } +} + -+void MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, -+ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { -+ Label loop; -+ Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; -+ -+ bind(loop); -+ vsetvli(tmp1, cnt, sew, Assembler::m2); -+ vlex_v(vr1, a1, sew); -+ vlex_v(vr2, a2, sew); -+ vmsne_vv(vrs, vr1, vr2); -+ vfirst_m(tmp2, vrs); -+ bgez(tmp2, DONE); -+ sub(cnt, cnt, tmp1); -+ if (!islatin) { -+ slli(tmp1, tmp1, 1); // get byte counts -+ } -+ add(a1, a1, tmp1); -+ add(a2, a2, tmp1); -+ bnez(cnt, loop); ++void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) ++{ ++ if (src1 == src2) { ++ mv(dst, zr); ++ return; ++ } ++ Label done; ++ Register left = src1; ++ Register right = src2; ++ if (dst == src1) { ++ assert_different_registers(dst, src2, tmp); ++ mv(tmp, src1); ++ left = tmp; ++ } else if (dst == src2) { ++ assert_different_registers(dst, src1, tmp); ++ mv(tmp, src2); ++ right = tmp; ++ } + -+ mv(result, true); ++ // installs 1 if gt else 0 ++ slt(dst, right, left); ++ bnez(dst, done); ++ slt(dst, left, right); ++ // dst = -1 if lt; else if eq , dst = 0 ++ neg(dst, dst); ++ bind(done); +} + -+void MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { -+ Label DONE; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ -+ BLOCK_COMMENT("string_equals_v {"); ++#ifdef COMPILER2 ++// short string ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL) ++{ ++ Register ch1 = t0; ++ Register index = t1; + -+ mv(result, false); ++ BLOCK_COMMENT("string_indexof_char_short {"); + -+ if (elem_size == 2) { -+ srli(cnt, cnt, 1); -+ } ++ Label LOOP, LOOP1, LOOP4, LOOP8; ++ Label MATCH, MATCH1, MATCH2, MATCH3, ++ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; + -+ element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); ++ mv(result, -1); ++ mv(index, zr); + -+ bind(DONE); -+ BLOCK_COMMENT("} string_equals_v"); -+} ++ bind(LOOP); ++ addi(t0, index, 8); ++ ble(t0, cnt1, LOOP8); ++ addi(t0, index, 4); ++ ble(t0, cnt1, LOOP4); ++ j(LOOP1); + -+// used by C2 ClearArray patterns. -+// base: Address of a buffer to be zeroed -+// cnt: Count in HeapWords -+// -+// base, cnt, v0, v1 and t0 are clobbered. -+void MacroAssembler::clear_array_v(Register base, Register cnt) { -+ Label loop; ++ bind(LOOP8); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); ++ beq(ch, ch1, MATCH4); ++ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); ++ beq(ch, ch1, MATCH5); ++ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); ++ beq(ch, ch1, MATCH6); ++ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); ++ beq(ch, ch1, MATCH7); ++ addi(index, index, 8); ++ addi(str1, str1, isL ? 8 : 16); ++ blt(index, cnt1, LOOP); ++ j(NOMATCH); + -+ // making zero words -+ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); -+ vxor_vv(v0, v0, v0); ++ bind(LOOP4); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ addi(index, index, 4); ++ addi(str1, str1, isL ? 4 : 8); ++ bge(index, cnt1, NOMATCH); + -+ bind(loop); -+ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); -+ vse64_v(v0, base); -+ sub(cnt, cnt, t0); -+ shadd(base, t0, base, t0, 3); -+ bnez(cnt, loop); -+} ++ bind(LOOP1); ++ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); ++ beq(ch, ch1, MATCH); ++ addi(index, index, 1); ++ addi(str1, str1, isL ? 1 : 2); ++ blt(index, cnt1, LOOP1); ++ j(NOMATCH); + -+void MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, -+ Register cnt1, int elem_size) { -+ Label DONE; -+ Register tmp1 = t0; -+ Register tmp2 = t1; -+ Register cnt2 = tmp2; -+ int length_offset = arrayOopDesc::length_offset_in_bytes(); -+ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); ++ bind(MATCH1); ++ addi(index, index, 1); ++ j(MATCH); + -+ BLOCK_COMMENT("arrays_equals_v {"); ++ bind(MATCH2); ++ addi(index, index, 2); ++ j(MATCH); + -+ // if (a1 == a2), return true -+ mv(result, true); -+ oop_equal(a1, a2, DONE); ++ bind(MATCH3); ++ addi(index, index, 3); ++ j(MATCH); + -+ mv(result, false); -+ // if a1 == null or a2 == null, return false -+ beqz(a1, DONE); -+ beqz(a2, DONE); -+ // if (a1.length != a2.length), return false -+ lwu(cnt1, Address(a1, length_offset)); -+ lwu(cnt2, Address(a2, length_offset)); -+ bne(cnt1, cnt2, DONE); ++ bind(MATCH4); ++ addi(index, index, 4); ++ j(MATCH); + -+ la(a1, Address(a1, base_offset)); -+ la(a2, Address(a2, base_offset)); ++ bind(MATCH5); ++ addi(index, index, 5); ++ j(MATCH); + -+ element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); ++ bind(MATCH6); ++ addi(index, index, 6); ++ j(MATCH); + -+ bind(DONE); ++ bind(MATCH7); ++ addi(index, index, 7); + -+ BLOCK_COMMENT("} arrays_equals_v"); ++ bind(MATCH); ++ mv(result, index); ++ bind(NOMATCH); ++ BLOCK_COMMENT("} string_indexof_char_short"); +} + -+void MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, -+ Register result, Register tmp1, Register tmp2, int encForm) { -+ Label DIFFERENCE, DONE, L, loop; -+ bool encLL = encForm == StrIntrinsicNode::LL; -+ bool encLU = encForm == StrIntrinsicNode::LU; -+ bool encUL = encForm == StrIntrinsicNode::UL; ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void MacroAssembler::string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL) ++{ ++ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; ++ Register ch1 = t0; ++ Register orig_cnt = t1; ++ Register mask1 = tmp3; ++ Register mask2 = tmp2; ++ Register match_mask = tmp1; ++ Register trailing_char = tmp4; ++ Register unaligned_elems = tmp4; + -+ bool str1_isL = encLL || encLU; -+ bool str2_isL = encLL || encUL; ++ BLOCK_COMMENT("string_indexof_char {"); ++ beqz(cnt1, NOMATCH); + -+ int minCharsInWord = encLL ? wordSize : wordSize / 2; ++ addi(t0, cnt1, isL ? -32 : -16); ++ bgtz(t0, DO_LONG); ++ string_indexof_char_short(str1, cnt1, ch, result, isL); ++ j(DONE); + -+ BLOCK_COMMENT("string_compare {"); ++ bind(DO_LONG); ++ mv(orig_cnt, cnt1); ++ if (AvoidUnalignedAccesses) { ++ Label ALIGNED; ++ andi(unaligned_elems, str1, 0x7); ++ beqz(unaligned_elems, ALIGNED); ++ sub(unaligned_elems, unaligned_elems, 8); ++ neg(unaligned_elems, unaligned_elems); ++ if (!isL) { ++ srli(unaligned_elems, unaligned_elems, 1); ++ } ++ // do unaligned part per element ++ string_indexof_char_short(str1, unaligned_elems, ch, result, isL); ++ bgez(result, DONE); ++ mv(orig_cnt, cnt1); ++ sub(cnt1, cnt1, unaligned_elems); ++ bind(ALIGNED); ++ } + -+ // for Lating strings, 1 byte for 1 character -+ // for UTF16 strings, 2 bytes for 1 character -+ if (!str1_isL) -+ sraiw(cnt1, cnt1, 1); -+ if (!str2_isL) -+ sraiw(cnt2, cnt2, 1); ++ // duplicate ch ++ if (isL) { ++ slli(ch1, ch, 8); ++ orr(ch, ch1, ch); ++ } ++ slli(ch1, ch, 16); ++ orr(ch, ch1, ch); ++ slli(ch1, ch, 32); ++ orr(ch, ch1, ch); + -+ // if str1 == str2, return the difference -+ // save the minimum of the string lengths in cnt2. -+ sub(result, cnt1, cnt2); -+ bgt(cnt1, cnt2, L); -+ mv(cnt2, cnt1); -+ bind(L); ++ if (!isL) { ++ slli(cnt1, cnt1, 1); ++ } + -+ if (str1_isL == str2_isL) { // LL or UU -+ element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); -+ j(DONE); -+ } else { // LU or UL -+ Register strL = encLU ? str1 : str2; -+ Register strU = encLU ? str2 : str1; -+ VectorRegister vstr1 = encLU ? v4 : v0; -+ VectorRegister vstr2 = encLU ? v0 : v4; ++ uint64_t mask0101 = UCONST64(0x0101010101010101); ++ uint64_t mask0001 = UCONST64(0x0001000100010001); ++ mv(mask1, isL ? mask0101 : mask0001); ++ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); ++ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); ++ mv(mask2, isL ? mask7f7f : mask7fff); + -+ bind(loop); -+ vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); -+ vle8_v(vstr1, strL); -+ vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); -+ vzext_vf2(vstr2, vstr1); -+ vle16_v(vstr1, strU); -+ vmsne_vv(v0, vstr2, vstr1); -+ vfirst_m(tmp2, v0); -+ bgez(tmp2, DIFFERENCE); -+ sub(cnt2, cnt2, tmp1); -+ add(strL, strL, tmp1); -+ shadd(strU, tmp1, strU, tmp1, 1); -+ bnez(cnt2, loop); -+ j(DONE); -+ } -+ bind(DIFFERENCE); -+ slli(tmp1, tmp2, 1); -+ add(str1, str1, str1_isL ? tmp2 : tmp1); -+ add(str2, str2, str2_isL ? tmp2 : tmp1); -+ str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); -+ str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); -+ sub(result, tmp1, tmp2); ++ bind(CH1_LOOP); ++ ld(ch1, Address(str1)); ++ addi(str1, str1, 8); ++ addi(cnt1, cnt1, -8); ++ compute_match_mask(ch1, ch, match_mask, mask1, mask2); ++ bnez(match_mask, HIT); ++ bgtz(cnt1, CH1_LOOP); ++ j(NOMATCH); + -+ bind(DONE); -+} ++ bind(HIT); ++ ctzc_bit(trailing_char, match_mask, isL, ch1, result); ++ srli(trailing_char, trailing_char, 3); ++ addi(cnt1, cnt1, 8); ++ ble(cnt1, trailing_char, NOMATCH); ++ // match case ++ if (!isL) { ++ srli(cnt1, cnt1, 1); ++ srli(trailing_char, trailing_char, 1); ++ } + -+address MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { -+ Label loop; -+ assert_different_registers(src, dst, len, tmp, t0); ++ sub(result, orig_cnt, cnt1); ++ add(result, result, trailing_char); ++ j(DONE); + -+ BLOCK_COMMENT("byte_array_inflate_v {"); -+ bind(loop); -+ vsetvli(tmp, len, Assembler::e8, Assembler::m2); -+ vle8_v(v2, src); -+ vsetvli(t0, len, Assembler::e16, Assembler::m4); -+ vzext_vf2(v0, v2); -+ vse16_v(v0, dst); -+ sub(len, len, tmp); -+ add(src, src, tmp); -+ shadd(dst, tmp, dst, tmp, 1); -+ bnez(len, loop); -+ BLOCK_COMMENT("} byte_array_inflate_v"); -+ postcond(pc() != badAddress); -+ return pc(); -+} ++ bind(NOMATCH); ++ mv(result, -1); + -+// Compress char[] array to byte[]. -+// result: the array length if every element in array can be encoded; 0, otherwise. -+void MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { -+ Label done; -+ encode_iso_array_v(src, dst, len, result, tmp); -+ beqz(len, done); -+ mv(result, zr); -+ bind(done); ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof_char"); +} + -+// result: the number of elements had been encoded. -+void MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { -+ Label loop, DIFFERENCE, DONE; ++typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp); + -+ BLOCK_COMMENT("encode_iso_array_v {"); -+ mv(result, 0); -+ -+ bind(loop); -+ mv(tmp, 0xff); -+ vsetvli(t0, len, Assembler::e16, Assembler::m2); -+ vle16_v(v2, src); -+ // if element > 0xff, stop -+ vmsgtu_vx(v1, v2, tmp); -+ vfirst_m(tmp, v1); -+ vmsbf_m(v0, v1); -+ // compress char to byte -+ vsetvli(t0, len, Assembler::e8); -+ vncvt_x_x_w(v1, v2, Assembler::v0_t); -+ vse8_v(v1, dst, Assembler::v0_t); -+ -+ bgez(tmp, DIFFERENCE); -+ add(result, result, t0); -+ add(dst, dst, t0); -+ sub(len, len, t0); -+ shadd(src, t0, src, t0, 1); -+ bnez(len, loop); -+ j(DONE); ++// Search for needle in haystack and return index or -1 ++// x10: result ++// x11: haystack ++// x12: haystack_len ++// x13: needle ++// x14: needle_len ++void MacroAssembler::string_indexof(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae) ++{ ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); + -+ bind(DIFFERENCE); -+ add(result, result, tmp); ++ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; + -+ bind(DONE); -+ BLOCK_COMMENT("} encode_iso_array_v"); -+} ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register nlen_tmp = tmp1; // needle len tmp ++ Register hlen_tmp = tmp2; // haystack len tmp ++ Register result_tmp = tmp4; + -+address MacroAssembler::has_negatives_v(Register ary, Register len, Register result, Register tmp) { -+ Label loop, DONE; ++ bool isLL = ae == StrIntrinsicNode::LL; + -+ mv(result, true); ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; + -+ bind(loop); -+ vsetvli(t0, len, Assembler::e8, Assembler::m4); -+ vle8_v(v0, ary); -+ // if element highest bit is set, return true -+ vmslt_vx(v0, v0, zr); -+ vfirst_m(tmp, v0); -+ bgez(tmp, DONE); -+ -+ sub(len, len, t0); -+ add(ary, ary, t0); -+ bnez(len, loop); -+ mv(result, false); ++ BLOCK_COMMENT("string_indexof {"); + -+ bind(DONE); -+ postcond(pc() != badAddress); -+ return pc(); -+} ++ // Note, inline_string_indexOf() generates checks: ++ // if (pattern.count > src.count) return -1; ++ // if (pattern.count == 0) return 0; + -+// string indexof -+// compute index by trailing zeros -+void MacroAssembler::compute_index(Register haystack, Register trailing_zero, -+ Register match_mask, Register result, -+ Register ch2, Register tmp, -+ bool haystack_isL) -+{ -+ int haystack_chr_shift = haystack_isL ? 0 : 1; -+ srl(match_mask, match_mask, trailing_zero); -+ srli(match_mask, match_mask, 1); -+ srli(tmp, trailing_zero, LogBitsPerByte); -+ if (!haystack_isL) andi(tmp, tmp, 0xE); -+ add(haystack, haystack, tmp); -+ ld(ch2, Address(haystack)); -+ if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); -+ add(result, result, tmp); -+} ++ // We have two strings, a source string in haystack, haystack_len and a pattern string ++ // in needle, needle_len. Find the first occurence of pattern in source or return -1. + -+// string indexof -+// Find pattern element in src, compute match mask, -+// only the first occurrence of 0x80/0x8000 at low bits is the valid match index -+// match mask patterns would be like: -+// - 0x8080808080808080 (Latin1) -+// - 0x8000800080008000 (UTF16) -+void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, -+ Register mask1, Register mask2) -+{ -+ xorr(src, pattern, src); -+ sub(match_mask, src, mask1); -+ orr(src, src, mask2); -+ notr(src, src); -+ andr(match_mask, match_mask, src); -+} ++ // For larger pattern and source we use a simplified Boyer Moore algorithm. ++ // With a small pattern and source we use linear scan. + -+// add two unsigned input and output carry -+void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) -+{ -+ assert_different_registers(dst, carry); -+ assert_different_registers(dst, src2); -+ add(dst, src1, src2); -+ sltu(carry, dst, src2); -+} ++ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. ++ sub(result_tmp, haystack_len, needle_len); ++ // needle_len < 8, use linear scan ++ sub(t0, needle_len, 8); ++ bltz(t0, LINEARSEARCH); ++ // needle_len >= 256, use linear scan ++ sub(t0, needle_len, 256); ++ bgez(t0, LINEARSTUB); ++ // needle_len >= haystack_len/4, use linear scan ++ srli(t0, haystack_len, 2); ++ bge(needle_len, t0, LINEARSTUB); + -+// add two input with carry -+void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) -+{ -+ assert_different_registers(dst, carry); -+ add(dst, src1, src2); -+ add(dst, dst, carry); -+} ++ // Boyer-Moore-Horspool introduction: ++ // The Boyer Moore alogorithm is based on the description here:- ++ // ++ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm ++ // ++ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule ++ // and the 'Good Suffix' rule. ++ // ++ // These rules are essentially heuristics for how far we can shift the ++ // pattern along the search string. ++ // ++ // The implementation here uses the 'Bad Character' rule only because of the ++ // complexity of initialisation for the 'Good Suffix' rule. ++ // ++ // This is also known as the Boyer-Moore-Horspool algorithm: ++ // ++ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm ++ // ++ // #define ASIZE 256 ++ // ++ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { ++ // int i, j; ++ // unsigned c; ++ // unsigned char bc[ASIZE]; ++ // ++ // /* Preprocessing */ ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ // ++ // /* Searching */ ++ // j = 0; ++ // while (j <= n - m) { ++ // c = src[i+j]; ++ // if (pattern[m-1] == c) ++ // int k; ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // if (k < 0) return j; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 ++ // // LL case: (c< 256) always true. Remove branch ++ // j += bc[pattern[j+m-1]]; ++ // #endif ++ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF ++ // // UU case: need if (c if not. ++ // if (c < ASIZE) ++ // j += bc[pattern[j+m-1]]; ++ // else ++ // j += m ++ // #endif ++ // } ++ // return -1; ++ // } + -+// add two unsigned input with carry and output carry -+void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) -+{ -+ assert_different_registers(dst, src2); -+ adc(dst, src1, src2, carry); -+ sltu(carry, dst, src2); -+} ++ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result ++ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, ++ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; + -+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, -+ Register src1, Register src2, Register carry) -+{ -+ cad(dest_lo, dest_lo, src1, carry); -+ add(dest_hi, dest_hi, carry); -+ cad(dest_lo, dest_lo, src2, carry); -+ add(final_dest_hi, dest_hi, carry); -+} ++ Register haystack_end = haystack_len; ++ Register skipch = tmp2; + -+// Code for BigInteger::mulAdd instrinsic -+// out = x10 -+// in = x11 -+// offset = x12 (already out.length-offset) -+// len = x13 -+// k = x14 -+void MacroAssembler::mul_add(Register out, Register in, Register offset, -+ Register len, Register k, Register tmp1, Register tmp2) { -+ Label L_loop_1, L_loop_2, L_end, L_not_zero; -+ bnez(len, L_not_zero); -+ mv(out, zr); -+ j(L_end); -+ bind(L_not_zero); -+ zero_extend(k, k, 32); -+ shadd(offset, offset, out, t0, LogBytesPerInt); -+ shadd(in, len, in, t0, LogBytesPerInt); -+ mv(out, zr); ++ // pattern length is >=8, so, we can read at least 1 register for cases when ++ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for ++ // UL case. We'll re-read last character in inner pre-loop code to have ++ // single outer pre-loop load ++ const int firstStep = isLL ? 7 : 3; + -+ if (AvoidUnalignedAccesses) { -+ // if in and offset are both 8 bytes aligned. -+ orr(t0, in, offset); -+ andi(t0, t0, 0x7); -+ beqz(t0, L_loop_2); -+ } else { -+ j(L_loop_2); -+ } ++ const int ASIZE = 256; ++ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) + -+ bind(L_loop_1); -+ sub(in, in, 4); -+ lwu(t0, Address(in, 0)); -+ mul(t1, t0, k); -+ add(t0, t1, out); -+ sub(offset, offset, 4); -+ lwu(t1, Address(offset, 0)); -+ add(t0, t0, t1); -+ sw(t0, Address(offset)); -+ srli(out, t0, 32); -+ sub(len, len, 1); -+ beqz(len, L_end); -+ j(L_loop_1); -+ -+ -+ bind(L_loop_2); -+ Label L_one; -+ sub(len, len, 1); -+ bltz(len, L_end); -+ sub(len, len, 1); -+ bltz(len, L_one); -+ -+ sub(in, in, 8); -+ ld(tmp1, Address(in, 0)); -+ ror_imm(tmp1, tmp1, 32); // convert to little-endian -+ -+ const Register carry = out; -+ const Register src1_hi = t0; -+ const Register src1_lo = tmp2; -+ const Register src2 = t1; -+ -+ mulhu(src1_hi, k, tmp1); -+ mul(src1_lo, k, tmp1); -+ sub(offset, offset, 8); -+ ld(src2, Address(offset, 0)); -+ ror_imm(src2, src2, 32, tmp1); -+ add2_with_carry(carry, src1_hi, src1_lo, carry, src2, tmp1); -+ ror_imm(src1_lo, src1_lo, 32, tmp1); // back to big-endian -+ sd(src1_lo, Address(offset, 0)); -+ j(L_loop_2); -+ -+ bind(L_one); -+ sub(in, in, 4); -+ lwu(t0, Address(in, 0)); -+ mul(t1, t0, k); -+ add(t0, t1, out); -+ sub(offset, offset, 4); -+ lwu(t1, Address(offset, 0)); -+ add(t0, t0, t1); -+ sw(t0, Address(offset)); -+ srli(out, t0, 32); ++ sub(sp, sp, ASIZE); + -+ bind(L_end); -+} ++ // init BC offset table with default value: needle_len ++ slli(t0, needle_len, 8); ++ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] ++ slli(tmp1, t0, 16); ++ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] ++ slli(tmp1, t0, 32); ++ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] + -+/** -+ * Multiply 32 bit by 32 bit first loop. -+ */ -+void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, -+ Register y, Register y_idx, Register z, -+ Register carry, Register product, -+ Register idx, Register kdx) -+{ -+ // long carry = 0; -+ // for (int j=ystart, k=ystart+1+xstart; j >= 0; j--, k--) { -+ // long product = (y[j] & LONG_MASK) * -+ // (x[xstart] & LONG_MASK) + carry; -+ // z[k] = (int)product; -+ // carry = product >>> 32; -+ // } -+ // z[xstart] = (int)carry; ++ mv(ch1, sp); // ch1 is t0 ++ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations + -+ Label L_first_loop, L_first_loop_exit; ++ bind(BM_INIT_LOOP); ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ for (int i = 0; i < 4; i++) { ++ sd(tmp5, Address(ch1, i * wordSize)); ++ } ++ add(ch1, ch1, 32); ++ sub(tmp6, tmp6, 4); ++ bgtz(tmp6, BM_INIT_LOOP); + -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ lwu(x_xstart, Address(t0, 0)); ++ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern ++ Register orig_haystack = tmp5; ++ mv(orig_haystack, haystack); ++ // result_tmp = tmp4 ++ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); ++ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 ++ mv(tmp3, needle); + -+ bind(L_first_loop); -+ sub(idx, idx, 1); -+ bltz(idx, L_first_loop_exit); ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ bind(BCLOOP); ++ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); ++ add(tmp3, tmp3, needle_chr_size); ++ if (!needle_isL) { ++ // ae == StrIntrinsicNode::UU ++ mv(tmp6, ASIZE); ++ bgeu(ch1, tmp6, BCSKIP); ++ } ++ add(tmp4, sp, ch1); ++ sb(ch2, Address(tmp4)); // store skip offset to BC offset table + -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ lwu(y_idx, Address(t0, 0)); -+ mul(product, x_xstart, y_idx); -+ add(product, product, carry); -+ srli(carry, product, 32); -+ sub(kdx, kdx, 1); -+ shadd(t0, kdx, z, t0, LogBytesPerInt); -+ sw(product, Address(t0, 0)); -+ j(L_first_loop); ++ bind(BCSKIP); ++ sub(ch2, ch2, 1); // for next pattern element, skip distance -1 ++ bgtz(ch2, BCLOOP); + -+ bind(L_first_loop_exit); -+} ++ // tmp6: pattern end, address after needle ++ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); ++ if (needle_isL == haystack_isL) { ++ // load last 8 bytes (8LL/4UU symbols) ++ ld(tmp6, Address(tmp6, -wordSize)); ++ } else { ++ // UL: from UTF-16(source) search Latin1(pattern) ++ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) ++ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d ++ // We'll have to wait until load completed, but it's still faster than per-character loads+checks ++ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a ++ slli(ch2, tmp6, XLEN - 24); ++ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b ++ slli(ch1, tmp6, XLEN - 16); ++ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c ++ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d ++ slli(ch2, ch2, 16); ++ orr(ch2, ch2, ch1); // 0x00000b0c ++ slli(result, tmp3, 48); // use result as temp register ++ orr(tmp6, tmp6, result); // 0x0a00000d ++ slli(result, ch2, 16); ++ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d ++ } + -+/** -+ * Multiply 64 bit by 64 bit first loop. -+ */ -+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, -+ Register y, Register y_idx, Register z, -+ Register carry, Register product, -+ Register idx, Register kdx) -+{ -+ // -+ // jlong carry, x[], y[], z[]; -+ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { -+ // huge_128 product = y[idx] * x[xstart] + carry; -+ // z[kdx] = (jlong)product; -+ // carry = (jlong)(product >>> 64); -+ // } -+ // z[xstart] = carry; -+ // ++ // i = m - 1; ++ // skipch = j + i; ++ // if (skipch == pattern[m - 1] ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // else ++ // move j with bad char offset table ++ bind(BMLOOPSTR2); ++ // compare pattern to source string backward ++ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); ++ (this->*haystack_load_1chr)(skipch, Address(result), noreg); ++ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 ++ if (needle_isL == haystack_isL) { ++ // re-init tmp3. It's for free because it's executed in parallel with ++ // load above. Alternative is to initialize it before loop, but it'll ++ // affect performance on in-order systems with 2 or more ld/st pipelines ++ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] ++ } ++ if (!isLL) { // UU/UL case ++ slli(ch2, nlen_tmp, 1); // offsets in bytes ++ } ++ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char ++ add(result, haystack, isLL ? nlen_tmp : ch2); ++ ld(ch2, Address(result)); // load 8 bytes from source string ++ mv(ch1, tmp6); ++ if (isLL) { ++ j(BMLOOPSTR1_AFTER_LOAD); ++ } else { ++ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 ++ j(BMLOOPSTR1_CMP); ++ } + -+ Label L_first_loop, L_first_loop_exit; -+ Label L_one_x, L_one_y, L_multiply; ++ bind(BMLOOPSTR1); ++ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); + -+ sub(xstart, xstart, 1); -+ bltz(xstart, L_one_x); ++ bind(BMLOOPSTR1_AFTER_LOAD); ++ sub(nlen_tmp, nlen_tmp, 1); ++ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); + -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ ld(x_xstart, Address(t0, 0)); -+ ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian ++ bind(BMLOOPSTR1_CMP); ++ beq(ch1, ch2, BMLOOPSTR1); + -+ bind(L_first_loop); -+ sub(idx, idx, 1); -+ bltz(idx, L_first_loop_exit); -+ sub(idx, idx, 1); -+ bltz(idx, L_one_y); ++ bind(BMSKIP); ++ if (!isLL) { ++ // if we've met UTF symbol while searching Latin1 pattern, then we can ++ // skip needle_len symbols ++ if (needle_isL != haystack_isL) { ++ mv(result_tmp, needle_len); ++ } else { ++ mv(result_tmp, 1); ++ } ++ mv(t0, ASIZE); ++ bgeu(skipch, t0, BMADV); ++ } ++ add(result_tmp, sp, skipch); ++ lbu(result_tmp, Address(result_tmp)); // load skip offset + -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ ld(y_idx, Address(t0, 0)); -+ ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian -+ bind(L_multiply); ++ bind(BMADV); ++ sub(nlen_tmp, needle_len, 1); ++ // move haystack after bad char skip offset ++ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); ++ ble(haystack, haystack_end, BMLOOPSTR2); ++ add(sp, sp, ASIZE); ++ j(NOMATCH); + -+ mulhu(t0, x_xstart, y_idx); -+ mul(product, x_xstart, y_idx); -+ cad(product, product, carry, t1); -+ adc(carry, t0, zr, t1); ++ bind(BMLOOPSTR1_LASTCMP); ++ bne(ch1, ch2, BMSKIP); + -+ sub(kdx, kdx, 2); -+ ror_imm(product, product, 32); // back to big-endian -+ shadd(t0, kdx, z, t0, LogBytesPerInt); -+ sd(product, Address(t0, 0)); ++ bind(BMMATCH); ++ sub(result, haystack, orig_haystack); ++ if (!haystack_isL) { ++ srli(result, result, 1); ++ } ++ add(sp, sp, ASIZE); ++ j(DONE); + -+ j(L_first_loop); ++ bind(LINEARSTUB); ++ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm ++ bltz(t0, LINEARSEARCH); ++ mv(result, zr); ++ RuntimeAddress stub = NULL; ++ if (isLL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); ++ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); ++ } else if (needle_isL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); ++ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); ++ } else { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); ++ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); ++ } ++ trampoline_call(stub); ++ j(DONE); + -+ bind(L_one_y); -+ lwu(y_idx, Address(y, 0)); -+ j(L_multiply); ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); + -+ bind(L_one_x); -+ lwu(x_xstart, Address(x, 0)); -+ j(L_first_loop); ++ bind(LINEARSEARCH); ++ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); + -+ bind(L_first_loop_exit); ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof"); +} + -+/** -+ * Multiply 128 bit by 128. Unrolled inner loop. -+ * -+ */ -+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, -+ Register carry, Register carry2, -+ Register idx, Register jdx, -+ Register yz_idx1, Register yz_idx2, -+ Register tmp, Register tmp3, Register tmp4, -+ Register tmp6, Register product_hi) ++// string_indexof ++// result: x10 ++// src: x11 ++// src_count: x12 ++// pattern: x13 ++// pattern_count: x14 or 1/2/3/4 ++void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae) +{ -+ // jlong carry, x[], y[], z[]; -+ // int kdx = xstart+1; -+ // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop -+ // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; -+ // jlong carry2 = (jlong)(tmp3 >>> 64); -+ // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; -+ // carry = (jlong)(tmp4 >>> 64); -+ // z[kdx+idx+1] = (jlong)tmp3; -+ // z[kdx+idx] = (jlong)tmp4; -+ // } -+ // idx += 2; -+ // if (idx > 0) { -+ // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; -+ // z[kdx+idx] = (jlong)yz_idx1; -+ // carry = (jlong)(yz_idx1 >>> 64); -+ // } -+ // -+ -+ Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; -+ -+ srli(jdx, idx, 2); ++ // Note: ++ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant ++ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 ++ assert(needle_con_cnt <= 4, "Invalid needle constant count"); ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); + -+ bind(L_third_loop); ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register hlen_neg = haystack_len, nlen_neg = needle_len; ++ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; + -+ sub(jdx, jdx, 1); -+ bltz(jdx, L_third_loop_exit); -+ sub(idx, idx, 4); ++ bool isLL = ae == StrIntrinsicNode::LL; + -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ ld(yz_idx2, Address(t0, 0)); -+ ld(yz_idx1, Address(t0, wordSize)); ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; + -+ shadd(tmp6, idx, z, t0, LogBytesPerInt); ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; ++ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; + -+ ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian -+ ror_imm(yz_idx2, yz_idx2, 32); ++ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; + -+ ld(t1, Address(tmp6, 0)); -+ ld(t0, Address(tmp6, wordSize)); ++ Register first = tmp3; + -+ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 -+ mulhu(tmp4, product_hi, yz_idx1); ++ if (needle_con_cnt == -1) { ++ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; + -+ ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian -+ ror_imm(t1, t1, 32, tmp); ++ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); ++ bltz(t0, DOSHORT); + -+ mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp -+ mulhu(carry2, product_hi, yz_idx2); ++ (this->*needle_load_1chr)(first, Address(needle), noreg); ++ slli(t0, needle_len, needle_chr_shift); ++ add(needle, needle, t0); ++ neg(nlen_neg, t0); ++ slli(t0, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, t0); ++ neg(hlen_neg, t0); + -+ cad(tmp3, tmp3, carry, carry); -+ adc(tmp4, tmp4, zr, carry); -+ cad(tmp3, tmp3, t0, t0); -+ cadc(tmp4, tmp4, tmp, t0); -+ adc(carry, carry2, zr, t0); -+ cad(tmp4, tmp4, t1, carry2); -+ adc(carry, carry, zr, carry2); ++ bind(FIRST_LOOP); ++ add(t0, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(t0), noreg); ++ beq(first, ch2, STR1_LOOP); + -+ ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian -+ ror_imm(tmp4, tmp4, 32); -+ sd(tmp4, Address(tmp6, 0)); -+ sd(tmp3, Address(tmp6, wordSize)); ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); + -+ j(L_third_loop); ++ bind(STR1_LOOP); ++ add(nlen_tmp, nlen_neg, needle_chr_size); ++ add(hlen_tmp, hlen_neg, haystack_chr_size); ++ bgez(nlen_tmp, MATCH); + -+ bind(L_third_loop_exit); ++ bind(STR1_NEXT); ++ add(ch1, needle, nlen_tmp); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ add(nlen_tmp, nlen_tmp, needle_chr_size); ++ add(hlen_tmp, hlen_tmp, haystack_chr_size); ++ bltz(nlen_tmp, STR1_NEXT); ++ j(MATCH); + -+ andi(idx, idx, 0x3); -+ beqz(idx, L_post_third_loop_done); ++ bind(DOSHORT); ++ if (needle_isL == haystack_isL) { ++ sub(t0, needle_len, 2); ++ bltz(t0, DO1); ++ bgtz(t0, DO3); ++ } ++ } + -+ Label L_check_1; -+ sub(idx, idx, 2); -+ bltz(idx, L_check_1); ++ if (needle_con_cnt == 4) { ++ Label CH1_LOOP; ++ (this->*load_4chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 4); ++ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); + -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ ld(yz_idx1, Address(t0, 0)); -+ ror_imm(yz_idx1, yz_idx1, 32); ++ bind(CH1_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_4chr)(ch2, Address(ch2), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ } + -+ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 -+ mulhu(tmp4, product_hi, yz_idx1); ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { ++ Label CH1_LOOP; ++ BLOCK_COMMENT("string_indexof DO2 {"); ++ bind(DO2); ++ (this->*load_2chr)(ch1, Address(needle), noreg); ++ if (needle_con_cnt == 2) { ++ sub(result_tmp, haystack_len, 2); ++ } ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); + -+ shadd(t0, idx, z, t0, LogBytesPerInt); -+ ld(yz_idx2, Address(t0, 0)); -+ ror_imm(yz_idx2, yz_idx2, 32, tmp); ++ bind(CH1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ BLOCK_COMMENT("} string_indexof DO2"); ++ } + -+ add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { ++ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; ++ BLOCK_COMMENT("string_indexof DO3 {"); + -+ ror_imm(tmp3, tmp3, 32, tmp); -+ sd(tmp3, Address(t0, 0)); ++ bind(DO3); ++ (this->*load_2chr)(first, Address(needle), noreg); ++ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); ++ if (needle_con_cnt == 3) { ++ sub(result_tmp, haystack_len, 3); ++ } ++ slli(hlen_tmp, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, hlen_tmp); ++ neg(hlen_neg, hlen_tmp); + -+ bind(L_check_1); ++ bind(FIRST_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(ch2), noreg); ++ beq(first, ch2, STR1_LOOP); + -+ andi(idx, idx, 0x1); -+ sub(idx, idx, 1); -+ bltz(idx, L_post_third_loop_done); -+ shadd(t0, idx, y, t0, LogBytesPerInt); -+ lwu(tmp4, Address(t0, 0)); -+ mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 -+ mulhu(carry2, tmp4, product_hi); ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); + -+ shadd(t0, idx, z, t0, LogBytesPerInt); -+ lwu(tmp4, Address(t0, 0)); ++ bind(STR1_LOOP); ++ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ j(MATCH); ++ BLOCK_COMMENT("} string_indexof DO3"); ++ } + -+ add2_with_carry(carry2, carry2, tmp3, tmp4, carry); ++ if (needle_con_cnt == -1 || needle_con_cnt == 1) { ++ Label DO1_LOOP; + -+ shadd(t0, idx, z, t0, LogBytesPerInt); -+ sw(tmp3, Address(t0, 0)); -+ slli(t0, carry2, 32); -+ srli(carry, tmp3, 32); -+ orr(carry, carry, t0); ++ BLOCK_COMMENT("string_indexof DO1 {"); ++ bind(DO1); ++ (this->*needle_load_1chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 1); ++ mv(tmp3, result_tmp); ++ if (haystack_chr_shift) { ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ } ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); + -+ bind(L_post_third_loop_done); ++ bind(DO1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, DO1_LOOP); ++ BLOCK_COMMENT("} string_indexof DO1"); ++ } ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); ++ ++ bind(MATCH); ++ srai(t0, hlen_neg, haystack_chr_shift); ++ add(result, result_tmp, t0); ++ ++ bind(DONE); +} + -+/** -+ * Code for BigInteger::multiplyToLen() instrinsic. -+ * -+ * x10: x -+ * x11: xlen -+ * x12: y -+ * x13: ylen -+ * x14: z -+ * x15: zlen -+ * x16: tmp1 -+ * x17: tmp2 -+ * x7: tmp3 -+ * x28: tmp4 -+ * x29: tmp5 -+ * x30: tmp6 -+ * x31: tmp7 -+ */ -+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, -+ Register z, Register zlen, -+ Register tmp1, Register tmp2, Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, Register product_hi) ++// Compare strings. ++void MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, ++ Register tmp3, int ae) +{ -+ assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); -+ -+ const Register idx = tmp1; -+ const Register kdx = tmp2; -+ const Register xstart = tmp3; ++ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, ++ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, ++ SHORT_LOOP_START, TAIL_CHECK, L; + -+ const Register y_idx = tmp4; -+ const Register carry = tmp5; -+ const Register product = xlen; -+ const Register x_xstart = zlen; // reuse register ++ const int STUB_THRESHOLD = 64 + 8; ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; + -+ mv(idx, ylen); // idx = ylen; -+ mv(kdx, zlen); // kdx = xlen+ylen; -+ mv(carry, zr); // carry = 0; ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; + -+ Label L_multiply_64_or_128, L_done; ++ // for L strings, 1 byte for 1 character ++ // for U strings, 2 bytes for 1 character ++ int str1_chr_size = str1_isL ? 1 : 2; ++ int str2_chr_size = str2_isL ? 1 : 2; ++ int minCharsInWord = isLL ? wordSize : wordSize / 2; + -+ sub(xstart, xlen, 1); -+ bltz(xstart, L_done); ++ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; + -+ const Register jdx = tmp1; ++ BLOCK_COMMENT("string_compare {"); + -+ if (AvoidUnalignedAccesses) { -+ // if x and y are both 8 bytes aligend. -+ orr(t0, xlen, ylen); -+ andi(t0, t0, 0x1); -+ beqz(t0, L_multiply_64_or_128); -+ } else { -+ j(L_multiply_64_or_128); ++ // Bizzarely, the counts are passed in bytes, regardless of whether they ++ // are L or U strings, however the result is always in characters. ++ if (!str1_isL) { ++ sraiw(cnt1, cnt1, 1); ++ } ++ if (!str2_isL) { ++ sraiw(cnt2, cnt2, 1); + } + -+ multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); ++ // Compute the minimum of the string lengths and save the difference in result. ++ sub(result, cnt1, cnt2); ++ bgt(cnt1, cnt2, L); ++ mv(cnt2, cnt1); ++ bind(L); + -+ Label L_second_loop_1; -+ bind(L_second_loop_1); -+ mv(carry, zr); -+ mv(jdx, ylen); -+ sub(xstart, xstart, 1); -+ bltz(xstart, L_done); -+ sub(sp, sp, 2 * wordSize); -+ sd(z, Address(sp, 0)); -+ sd(zr, Address(sp, wordSize)); -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ addi(z, t0, 4); -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ lwu(product, Address(t0, 0)); -+ Label L_third_loop, L_third_loop_exit; ++ // A very short string ++ li(t0, minCharsInWord); ++ ble(cnt2, t0, SHORT_STRING); + -+ bind(L_third_loop); -+ sub(jdx, jdx, 1); -+ bltz(jdx, L_third_loop_exit); ++ // Compare longwords ++ // load first parts of strings and finish initialization while loading ++ { ++ if (str1_isL == str2_isL) { // LL or UU ++ // load 8 bytes once to compare ++ ld(tmp1, Address(str1)); ++ beq(str1, str2, DONE); ++ ld(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ sub(cnt2, cnt2, minCharsInWord); ++ beqz(cnt2, TAIL_CHECK); ++ // convert cnt2 from characters to bytes ++ if (!str1_isL) { ++ slli(cnt2, cnt2, 1); ++ } ++ add(str2, str2, cnt2); ++ add(str1, str1, cnt2); ++ sub(cnt2, zr, cnt2); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ add(str1, str1, cnt2); ++ sub(cnt1, zr, cnt2); ++ slli(cnt2, cnt2, 1); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 4); ++ } else { // UL case ++ ld(tmp1, Address(str1)); ++ lwu(tmp2, Address(str2)); ++ li(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ slli(t0, cnt2, 1); ++ sub(cnt1, zr, t0); ++ add(str1, str1, t0); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 8); ++ } ++ addi(cnt2, cnt2, isUL ? 4 : 8); ++ bgez(cnt2, TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); + -+ shadd(t0, jdx, y, t0, LogBytesPerInt); -+ lwu(t0, Address(t0, 0)); -+ mul(t1, t0, product); -+ add(t0, t1, carry); -+ shadd(tmp6, jdx, z, t1, LogBytesPerInt); -+ lwu(t1, Address(tmp6, 0)); -+ add(t0, t0, t1); -+ sw(t0, Address(tmp6, 0)); -+ srli(carry, t0, 32); -+ j(L_third_loop); ++ // main loop ++ bind(NEXT_WORD); ++ if (str1_isL == str2_isL) { // LL or UU ++ add(t0, str1, cnt2); ++ ld(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt2, cnt2, 8); ++ } else if (isLU) { // LU case ++ add(t0, str1, cnt1); ++ lwu(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt1, cnt1, 4); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ addi(cnt2, cnt2, 8); ++ } else { // UL case ++ add(t0, str2, cnt2); ++ lwu(tmp2, Address(t0)); ++ add(t0, str1, cnt1); ++ ld(tmp1, Address(t0)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ addi(cnt1, cnt1, 8); ++ addi(cnt2, cnt2, 4); ++ } ++ bgez(cnt2, TAIL); + -+ bind(L_third_loop_exit); -+ ld(z, Address(sp, 0)); -+ addi(sp, sp, 2 * wordSize); -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, NEXT_WORD); ++ j(DIFFERENCE); ++ bind(TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ if (str1_isL == str2_isL) { // LL or UU ++ ld(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ } else { // UL case ++ lwu(tmp2, Address(str2)); ++ ld(tmp1, Address(str1)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ } ++ bind(TAIL_CHECK); ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, DONE); + -+ j(L_second_loop_1); ++ // Find the first different characters in the longwords and ++ // compute their difference. ++ bind(DIFFERENCE); ++ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb ++ srl(tmp1, tmp1, result); ++ srl(tmp2, tmp2, result); ++ if (isLL) { ++ andi(tmp1, tmp1, 0xFF); ++ andi(tmp2, tmp2, 0xFF); ++ } else { ++ andi(tmp1, tmp1, 0xFFFF); ++ andi(tmp2, tmp2, 0xFFFF); ++ } ++ sub(result, tmp1, tmp2); ++ j(DONE); ++ } + -+ bind(L_multiply_64_or_128); -+ multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); ++ bind(STUB); ++ RuntimeAddress stub = NULL; ++ switch (ae) { ++ case StrIntrinsicNode::LL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); ++ break; ++ case StrIntrinsicNode::UU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); ++ break; ++ case StrIntrinsicNode::LU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); ++ break; ++ case StrIntrinsicNode::UL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ assert(stub.target() != NULL, "compare_long_string stub has not been generated"); ++ trampoline_call(stub); ++ j(DONE); + -+ Label L_second_loop_2; -+ beqz(kdx, L_second_loop_2); ++ bind(SHORT_STRING); ++ // Is the minimum length zero? ++ beqz(cnt2, DONE); ++ // arrange code to do most branches while loading and loading next characters ++ // while comparing previous ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ j(SHORT_LOOP_START); ++ bind(SHORT_LOOP); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST); ++ bind(SHORT_LOOP_START); ++ (this->*str1_load_chr)(tmp2, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(t0, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bne(tmp1, cnt1, SHORT_LOOP_TAIL); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST2); ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ beq(tmp2, t0, SHORT_LOOP); ++ sub(result, tmp2, t0); ++ j(DONE); ++ bind(SHORT_LOOP_TAIL); ++ sub(result, tmp1, cnt1); ++ j(DONE); ++ bind(SHORT_LAST2); ++ beq(tmp2, t0, DONE); ++ sub(result, tmp2, t0); + -+ Label L_carry; -+ sub(kdx, kdx, 1); -+ beqz(kdx, L_carry); ++ j(DONE); ++ bind(SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bind(SHORT_LAST); ++ beq(tmp1, cnt1, DONE); ++ sub(result, tmp1, cnt1); + -+ shadd(t0, kdx, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); -+ srli(carry, carry, 32); -+ sub(kdx, kdx, 1); ++ bind(DONE); + -+ bind(L_carry); -+ shadd(t0, kdx, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); ++ BLOCK_COMMENT("} string_compare"); ++} + -+ // Second and third (nested) loops. -+ // -+ // for (int i = xstart-1; i >= 0; i--) { // Second loop -+ // carry = 0; -+ // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop -+ // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + -+ // (z[k] & LONG_MASK) + carry; -+ // z[k] = (int)product; -+ // carry = product >>> 32; -+ // } -+ // z[i] = (int)carry; -+ // } -+ // -+ // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi ++void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, ++ Register tmp4, Register tmp5, Register tmp6, Register result, ++ Register cnt1, int elem_size) { ++ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ Register cnt2 = tmp2; // cnt2 only used in array length compare ++ Register elem_per_word = tmp6; ++ int log_elem_size = exact_log2(elem_size); ++ int length_offset = arrayOopDesc::length_offset_in_bytes(); ++ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); + -+ bind(L_second_loop_2); -+ mv(carry, zr); // carry = 0; -+ mv(jdx, ylen); // j = ystart+1 ++ assert(elem_size == 1 || elem_size == 2, "must be char or byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); ++ li(elem_per_word, wordSize / elem_size); + -+ sub(xstart, xstart, 1); // i = xstart-1; -+ bltz(xstart, L_done); ++ BLOCK_COMMENT("arrays_equals {"); + -+ sub(sp, sp, 4 * wordSize); -+ sd(z, Address(sp, 0)); ++ // if (a1 == a2), return true ++ beq(a1, a2, SAME); + -+ Label L_last_x; -+ shadd(t0, xstart, z, t0, LogBytesPerInt); -+ addi(z, t0, 4); -+ sub(xstart, xstart, 1); // i = xstart-1; -+ bltz(xstart, L_last_x); ++ mv(result, false); ++ beqz(a1, DONE); ++ beqz(a2, DONE); ++ lwu(cnt1, Address(a1, length_offset)); ++ lwu(cnt2, Address(a2, length_offset)); ++ bne(cnt2, cnt1, DONE); ++ beqz(cnt1, SAME); + -+ shadd(t0, xstart, x, t0, LogBytesPerInt); -+ ld(product_hi, Address(t0, 0)); -+ ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian ++ slli(tmp5, cnt1, 3 + log_elem_size); ++ sub(tmp5, zr, tmp5); ++ add(a1, a1, base_offset); ++ add(a2, a2, base_offset); ++ ld(tmp3, Address(a1, 0)); ++ ld(tmp4, Address(a2, 0)); ++ ble(cnt1, elem_per_word, SHORT); // short or same + -+ Label L_third_loop_prologue; -+ bind(L_third_loop_prologue); ++ // Main 16 byte comparison loop with 2 exits ++ bind(NEXT_DWORD); { ++ ld(tmp1, Address(a1, wordSize)); ++ ld(tmp2, Address(a2, wordSize)); ++ sub(cnt1, cnt1, 2 * wordSize / elem_size); ++ blez(cnt1, TAIL); ++ bne(tmp3, tmp4, DONE); ++ ld(tmp3, Address(a1, 2 * wordSize)); ++ ld(tmp4, Address(a2, 2 * wordSize)); ++ add(a1, a1, 2 * wordSize); ++ add(a2, a2, 2 * wordSize); ++ ble(cnt1, elem_per_word, TAIL2); ++ } beq(tmp1, tmp2, NEXT_DWORD); ++ j(DONE); + -+ sd(ylen, Address(sp, wordSize)); -+ sd(x, Address(sp, 2 * wordSize)); -+ sd(xstart, Address(sp, 3 * wordSize)); -+ multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, -+ tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); -+ ld(z, Address(sp, 0)); -+ ld(ylen, Address(sp, wordSize)); -+ ld(x, Address(sp, 2 * wordSize)); -+ ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen -+ addi(sp, sp, 4 * wordSize); ++ bind(TAIL); ++ xorr(tmp4, tmp3, tmp4); ++ xorr(tmp2, tmp1, tmp2); ++ sll(tmp2, tmp2, tmp5); ++ orr(tmp5, tmp4, tmp2); ++ j(IS_TMP5_ZR); + -+ addi(tmp3, xlen, 1); -+ shadd(t0, tmp3, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); ++ bind(TAIL2); ++ bne(tmp1, tmp2, DONE); + -+ sub(tmp3, tmp3, 1); -+ bltz(tmp3, L_done); ++ bind(SHORT); ++ xorr(tmp4, tmp3, tmp4); ++ sll(tmp5, tmp4, tmp5); + -+ // z[i] = (int) carry; -+ srli(carry, carry, 32); -+ shadd(t0, tmp3, z, t0, LogBytesPerInt); -+ sw(carry, Address(t0, 0)); -+ j(L_second_loop_2); ++ bind(IS_TMP5_ZR); ++ bnez(tmp5, DONE); + -+ // Next infrequent code is moved outside loops. -+ bind(L_last_x); -+ lwu(product_hi, Address(x, 0)); -+ j(L_third_loop_prologue); ++ bind(SAME); ++ mv(result, true); ++ // That's it. ++ bind(DONE); + -+ bind(L_done); ++ BLOCK_COMMENT("} array_equals"); +} -+#endif // COMPILER2 + -+// Count bits of trailing zero chars from lsb to msb until first non-zero element. -+// For LL case, one byte for one element, so shift 8 bits once, and for other case, -+// shift 16 bits once. -+void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) -+{ -+ if (UseZbb) { -+ assert_different_registers(Rd, Rs, tmp1); -+ int step = isLL ? 8 : 16; -+ ctz(Rd, Rs); -+ andi(tmp1, Rd, step - 1); -+ sub(Rd, Rd, tmp1); -+ return; -+ } -+ assert_different_registers(Rd, Rs, tmp1, tmp2); -+ Label Loop; -+ int step = isLL ? 8 : 16; -+ mv(Rd, -step); -+ mv(tmp2, Rs); ++// Compare Strings + -+ bind(Loop); -+ addi(Rd, Rd, step); -+ andi(tmp1, tmp2, ((1 << step) - 1)); -+ srli(tmp2, tmp2, step); -+ beqz(tmp1, Loop); -+} ++// For Strings we're passed the address of the first characters in a1 ++// and a2 and the length in cnt1. ++// elem_size is the element size in bytes: either 1 or 2. ++// There are two implementations. For arrays >= 8 bytes, all ++// comparisons (including the final one, which may overlap) are ++// performed 8 bytes at a time. For strings < 8 bytes, we compare a ++// halfword, then a short, and then a byte. + -+// This instruction reads adjacent 4 bytes from the lower half of source register, -+// inflate into a register, for example: -+// Rs: A7A6A5A4A3A2A1A0 -+// Rd: 00A300A200A100A0 -+void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) ++void MacroAssembler::string_equals(Register a1, Register a2, ++ Register result, Register cnt1, int elem_size) +{ -+ assert_different_registers(Rd, Rs, tmp1, tmp2); -+ mv(tmp1, 0xFF000000); // first byte mask at lower word -+ andr(Rd, Rs, tmp1); -+ for (int i = 0; i < 2; i++) { -+ slli(Rd, Rd, wordSize); -+ srli(tmp1, tmp1, wordSize); -+ andr(tmp2, Rs, tmp1); -+ orr(Rd, Rd, tmp2); -+ } -+ slli(Rd, Rd, wordSize); -+ andi(tmp2, Rs, 0xFF); // last byte mask at lower word -+ orr(Rd, Rd, tmp2); -+} ++ Label SAME, DONE, SHORT, NEXT_WORD; ++ Register tmp1 = t0; ++ Register tmp2 = t1; + -+// This instruction reads adjacent 4 bytes from the upper half of source register, -+// inflate into a register, for example: -+// Rs: A7A6A5A4A3A2A1A0 -+// Rd: 00A700A600A500A4 -+void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) -+{ -+ assert_different_registers(Rd, Rs, tmp1, tmp2); -+ srli(Rs, Rs, 32); // only upper 32 bits are needed -+ inflate_lo32(Rd, Rs, tmp1, tmp2); -+} ++ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1); + -+// The size of the blocks erased by the zero_blocks stub. We must -+// handle anything smaller than this ourselves in zero_words(). -+const int MacroAssembler::zero_words_block_size = 8; ++ BLOCK_COMMENT("string_equals {"); + -+// zero_words() is used by C2 ClearArray patterns. It is as small as -+// possible, handling small word counts locally and delegating -+// anything larger to the zero_blocks stub. It is expanded many times -+// in compiled code, so it is important to keep it short. ++ mv(result, false); + -+// ptr: Address of a buffer to be zeroed. -+// cnt: Count in HeapWords. -+// -+// ptr, cnt, and t0 are clobbered. -+address MacroAssembler::zero_words(Register ptr, Register cnt) -+{ -+ assert(is_power_of_2(zero_words_block_size), "adjust this"); -+ assert(ptr == x28 && cnt == x29, "mismatch in register usage"); -+ assert_different_registers(cnt, t0); ++ // Check for short strings, i.e. smaller than wordSize. ++ sub(cnt1, cnt1, wordSize); ++ bltz(cnt1, SHORT); + -+ BLOCK_COMMENT("zero_words {"); -+ mv(t0, zero_words_block_size); -+ Label around, done, done16; -+ bltu(cnt, t0, around); ++ // Main 8 byte comparison loop. ++ bind(NEXT_WORD); { ++ ld(tmp1, Address(a1, 0)); ++ add(a1, a1, wordSize); ++ ld(tmp2, Address(a2, 0)); ++ add(a2, a2, wordSize); ++ sub(cnt1, cnt1, wordSize); ++ bne(tmp1, tmp2, DONE); ++ } bgtz(cnt1, NEXT_WORD); ++ ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when ++ // length == 4. ++ add(tmp1, a1, cnt1); ++ ld(tmp1, Address(tmp1, 0)); ++ add(tmp2, a2, cnt1); ++ ld(tmp2, Address(tmp2, 0)); ++ bne(tmp1, tmp2, DONE); ++ j(SAME); ++ ++ bind(SHORT); ++ Label TAIL03, TAIL01; ++ ++ // 0-7 bytes left. ++ andi(t0, cnt1, 4); ++ beqz(t0, TAIL03); + { -+ RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); -+ assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); -+ if (StubRoutines::riscv::complete()) { -+ address tpc = trampoline_call(zero_blocks); -+ if (tpc == NULL) { -+ DEBUG_ONLY(reset_labels1(around)); -+ postcond(pc() == badAddress); -+ return NULL; -+ } -+ } else { -+ jal(zero_blocks); -+ } -+ } -+ bind(around); -+ for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { -+ Label l; -+ andi(t0, cnt, i); -+ beqz(t0, l); -+ for (int j = 0; j < i; j++) { -+ sd(zr, Address(ptr, 0)); -+ addi(ptr, ptr, 8); -+ } -+ bind(l); ++ lwu(tmp1, Address(a1, 0)); ++ add(a1, a1, 4); ++ lwu(tmp2, Address(a2, 0)); ++ add(a2, a2, 4); ++ bne(tmp1, tmp2, DONE); + } ++ ++ bind(TAIL03); ++ // 0-3 bytes left. ++ andi(t0, cnt1, 2); ++ beqz(t0, TAIL01); + { -+ Label l; -+ andi(t0, cnt, 1); -+ beqz(t0, l); -+ sd(zr, Address(ptr, 0)); -+ bind(l); ++ lhu(tmp1, Address(a1, 0)); ++ add(a1, a1, 2); ++ lhu(tmp2, Address(a2, 0)); ++ add(a2, a2, 2); ++ bne(tmp1, tmp2, DONE); + } -+ BLOCK_COMMENT("} zero_words"); -+ postcond(pc() != badAddress); -+ return pc(); -+} -+ -+// base: Address of a buffer to be zeroed, 8 bytes aligned. -+// cnt: Immediate count in HeapWords. -+#define SmallArraySize (18 * BytesPerLong) -+void MacroAssembler::zero_words(Register base, uint64_t cnt) -+{ -+ assert_different_registers(base, t0, t1); -+ -+ BLOCK_COMMENT("zero_words {"); -+ -+ if (cnt <= SmallArraySize / BytesPerLong) { -+ for (int i = 0; i < (int)cnt; i++) { -+ sd(zr, Address(base, i * wordSize)); -+ } -+ } else { -+ const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll -+ int remainder = cnt % unroll; -+ for (int i = 0; i < remainder; i++) { -+ sd(zr, Address(base, i * wordSize)); -+ } + -+ Label loop; -+ Register cnt_reg = t0; -+ Register loop_base = t1; -+ cnt = cnt - remainder; -+ mv(cnt_reg, cnt); -+ add(loop_base, base, remainder * wordSize); -+ bind(loop); -+ sub(cnt_reg, cnt_reg, unroll); -+ for (int i = 0; i < unroll; i++) { -+ sd(zr, Address(loop_base, i * wordSize)); ++ bind(TAIL01); ++ if (elem_size == 1) { // Only needed when comparing 1-byte elements ++ // 0-1 bytes left. ++ andi(t0, cnt1, 1); ++ beqz(t0, SAME); ++ { ++ lbu(tmp1, a1, 0); ++ lbu(tmp2, a2, 0); ++ bne(tmp1, tmp2, DONE); + } -+ add(loop_base, loop_base, unroll * wordSize); -+ bnez(cnt_reg, loop); + } -+ BLOCK_COMMENT("} zero_words"); ++ ++ // Arrays are equal. ++ bind(SAME); ++ mv(result, true); ++ ++ // That's it. ++ bind(DONE); ++ BLOCK_COMMENT("} string_equals"); +} + -+// base: Address of a buffer to be filled, 8 bytes aligned. -+// cnt: Count in 8-byte unit. -+// value: Value to be filled with. -+// base will point to the end of the buffer after filling. -+void MacroAssembler::fill_words(Register base, Register cnt, Register value) ++typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); ++typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, ++ bool is_far, bool is_unordered); ++ ++static conditional_branch_insn conditional_branches[] = +{ -+// Algorithm: -+// -+// t0 = cnt & 7 -+// cnt -= t0 -+// p += t0 -+// switch (t0): -+// switch start: -+// do while cnt -+// cnt -= 8 -+// p[-8] = value -+// case 7: -+// p[-7] = value -+// case 6: -+// p[-6] = value -+// // ... -+// case 1: -+// p[-1] = value -+// case 0: -+// p += 8 -+// do-while end -+// switch end ++ /* SHORT branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgt, ++ NULL, // BoolTest::overflow ++ (conditional_branch_insn)&Assembler::blt, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::ble, ++ NULL, // BoolTest::no_overflow ++ (conditional_branch_insn)&Assembler::bge, + -+ assert_different_registers(base, cnt, value, t0, t1); ++ /* UNSIGNED branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgtu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bltu, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::bleu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bgeu ++}; + -+ Label fini, skip, entry, loop; -+ const int unroll = 8; // Number of sd instructions we'll unroll ++static float_conditional_branch_insn float_conditional_branches[] = ++{ ++ /* FLOAT SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::float_beq, ++ (float_conditional_branch_insn)&MacroAssembler::float_bgt, ++ NULL, // BoolTest::overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_blt, ++ (float_conditional_branch_insn)&MacroAssembler::float_bne, ++ (float_conditional_branch_insn)&MacroAssembler::float_ble, ++ NULL, // BoolTest::no_overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_bge, + -+ beqz(cnt, fini); ++ /* DOUBLE SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::double_beq, ++ (float_conditional_branch_insn)&MacroAssembler::double_bgt, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_blt, ++ (float_conditional_branch_insn)&MacroAssembler::double_bne, ++ (float_conditional_branch_insn)&MacroAssembler::double_ble, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_bge ++}; + -+ andi(t0, cnt, unroll - 1); -+ sub(cnt, cnt, t0); -+ // align 8, so first sd n % 8 = mod, next loop sd 8 * n. -+ shadd(base, t0, base, t1, 3); -+ la(t1, entry); -+ slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) -+ sub(t1, t1, t0); -+ jr(t1); ++void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), ++ "invalid conditional branch index"); ++ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); ++} + -+ bind(loop); -+ add(base, base, unroll * 8); -+ for (int i = -unroll; i < 0; i++) { -+ sd(value, Address(base, i * 8)); ++// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use ++// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). ++void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), ++ "invalid float conditional branch index"); ++ int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask); ++ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, ++ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); ++} ++ ++void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ case BoolTest::le: ++ beqz(op1, L, is_far); ++ break; ++ case BoolTest::ne: ++ case BoolTest::gt: ++ bnez(op1, L, is_far); ++ break; ++ default: ++ ShouldNotReachHere(); + } -+ bind(entry); -+ sub(cnt, cnt, unroll); -+ bgez(cnt, loop); ++} + -+ bind(fini); ++void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ beqz(op1, L, is_far); ++ break; ++ case BoolTest::ne: ++ bnez(op1, L, is_far); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } +} + -+#define FCVT_SAFE(FLOATCVT, FLOATEQ) \ -+void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ -+ Label L_Okay; \ -+ fscsr(zr); \ -+ FLOATCVT(dst, src); \ -+ frcsr(tmp); \ -+ andi(tmp, tmp, 0x1E); \ -+ beqz(tmp, L_Okay); \ -+ FLOATEQ(tmp, src, src); \ -+ bnez(tmp, L_Okay); \ -+ mv(dst, zr); \ -+ bind(L_Okay); \ ++void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { ++ Label L; ++ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); ++ mv(dst, src); ++ bind(L); +} + -+FCVT_SAFE(fcvt_w_s, feq_s) -+FCVT_SAFE(fcvt_l_s, feq_s) -+FCVT_SAFE(fcvt_w_d, feq_d) -+FCVT_SAFE(fcvt_l_d, feq_d) ++// Set dst to NaN if any NaN input. ++void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min) { ++ assert_different_registers(dst, src1, src2); + -+#undef FCVT_SAFE ++ Label Done; ++ fsflags(zr); ++ if (is_double) { ++ is_min ? fmin_d(dst, src1, src2) ++ : fmax_d(dst, src1, src2); ++ // Checking NaNs ++ flt_d(zr, src1, src2); ++ } else { ++ is_min ? fmin_s(dst, src1, src2) ++ : fmax_s(dst, src1, src2); ++ // Checking NaNs ++ flt_s(zr, src1, src2); ++ } + -+#define FCMP(FLOATTYPE, FLOATSIG) \ -+void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ -+ FloatRegister Rs2, int unordered_result) { \ -+ Label Ldone; \ -+ if (unordered_result < 0) { \ -+ /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ -+ /* installs 1 if gt else 0 */ \ -+ flt_##FLOATSIG(result, Rs2, Rs1); \ -+ /* Rs1 > Rs2, install 1 */ \ -+ bgtz(result, Ldone); \ -+ feq_##FLOATSIG(result, Rs1, Rs2); \ -+ addi(result, result, -1); \ -+ /* Rs1 = Rs2, install 0 */ \ -+ /* NaN or Rs1 < Rs2, install -1 */ \ -+ bind(Ldone); \ -+ } else { \ -+ /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ -+ /* installs 1 if gt or unordered else 0 */ \ -+ flt_##FLOATSIG(result, Rs1, Rs2); \ -+ /* Rs1 < Rs2, install -1 */ \ -+ bgtz(result, Ldone); \ -+ feq_##FLOATSIG(result, Rs1, Rs2); \ -+ addi(result, result, -1); \ -+ /* Rs1 = Rs2, install 0 */ \ -+ /* NaN or Rs1 > Rs2, install 1 */ \ -+ bind(Ldone); \ -+ neg(result, result); \ -+ } \ -+} ++ frflags(t0); ++ beqz(t0, Done); + -+FCMP(float, s); -+FCMP(double, d); ++ // In case of NaNs ++ is_double ? fadd_d(dst, src1, src2) ++ : fadd_s(dst, src1, src2); + -+#undef FCMP ++ bind(Done); ++} + -+// Zero words; len is in bytes -+// Destroys all registers except addr -+// len must be a nonzero multiple of wordSize -+void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) { -+ assert_different_registers(addr, len, tmp1, t0, t1); ++#endif // COMPILER2 + -+#ifdef ASSERT -+ { -+ Label L; -+ andi(t0, len, BytesPerWord - 1); -+ beqz(t0, L); -+ stop("len is not a multiple of BytesPerWord"); -+ bind(L); -+ } -+#endif // ASSERT +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +new file mode 100644 +index 0000000000..c660bce437 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -0,0 +1,966 @@ ++/* ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+#ifndef PRODUCT -+ block_comment("zero memory"); -+#endif // PRODUCT ++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP ++#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP + -+ Label loop; -+ Label entry; ++#include "asm/assembler.hpp" ++#include "metaprogramming/enableIf.hpp" + -+ // Algorithm: -+ // -+ // t0 = cnt & 7 -+ // cnt -= t0 -+ // p += t0 -+ // switch (t0) { -+ // do { -+ // cnt -= 8 -+ // p[-8] = 0 -+ // case 7: -+ // p[-7] = 0 -+ // case 6: -+ // p[-6] = 0 -+ // ... -+ // case 1: -+ // p[-1] = 0 -+ // case 0: -+ // p += 8 -+ // } while (cnt) -+ // } ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. + -+ const int unroll = 8; // Number of sd(zr) instructions we'll unroll -+ -+ srli(len, len, LogBytesPerWord); -+ andi(t0, len, unroll - 1); // t0 = cnt % unroll -+ sub(len, len, t0); // cnt -= unroll -+ // tmp1 always points to the end of the region we're about to zero -+ shadd(tmp1, t0, addr, t1, LogBytesPerWord); -+ la(t1, entry); -+ slli(t0, t0, 2); -+ sub(t1, t1, t0); -+ jr(t1); -+ bind(loop); -+ sub(len, len, unroll); -+ for (int i = -unroll; i < 0; i++) { -+ Assembler::sd(zr, Address(tmp1, i * wordSize)); -+ } -+ bind(entry); -+ add(tmp1, tmp1, unroll * wordSize); -+ bnez(len, loop); -+} -+ -+// shift left by shamt and add -+// Rd = (Rs1 << shamt) + Rs2 -+void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { -+ if (UseZba) { -+ if (shamt == 1) { -+ sh1add(Rd, Rs1, Rs2); -+ return; -+ } else if (shamt == 2) { -+ sh2add(Rd, Rs1, Rs2); -+ return; -+ } else if (shamt == 3) { -+ sh3add(Rd, Rs1, Rs2); -+ return; -+ } -+ } -+ -+ if (shamt != 0) { -+ slli(tmp, Rs1, shamt); -+ add(Rd, Rs2, tmp); -+ } else { -+ add(Rd, Rs1, Rs2); -+ } -+} -+ -+void MacroAssembler::zero_extend(Register dst, Register src, int bits) { -+ if (UseZba && bits == 32) { -+ zext_w(dst, src); -+ return; -+ } -+ -+ if (UseZbb && bits == 16) { -+ zext_h(dst, src); -+ return; -+ } -+ -+ if (bits == 8) { -+ zext_b(dst, src); -+ } else { -+ slli(dst, src, XLEN - bits); -+ srli(dst, dst, XLEN - bits); -+ } -+} -+ -+void MacroAssembler::sign_extend(Register dst, Register src, int bits) { -+ if (UseZbb) { -+ if (bits == 8) { -+ sext_b(dst, src); -+ return; -+ } else if (bits == 16) { -+ sext_h(dst, src); -+ return; -+ } -+ } -+ -+ if (bits == 32) { -+ sext_w(dst, src); -+ } else { -+ slli(dst, src, XLEN - bits); -+ srai(dst, dst, XLEN - bits); -+ } -+} -+ -+void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) -+{ -+ if (src1 == src2) { -+ mv(dst, zr); -+ return; -+ } -+ Label done; -+ Register left = src1; -+ Register right = src2; -+ if (dst == src1) { -+ assert_different_registers(dst, src2, tmp); -+ mv(tmp, src1); -+ left = tmp; -+ } else if (dst == src2) { -+ assert_different_registers(dst, src1, tmp); -+ mv(tmp, src2); -+ right = tmp; -+ } -+ -+ // installs 1 if gt else 0 -+ slt(dst, right, left); -+ bnez(dst, done); -+ slt(dst, left, right); -+ // dst = -1 if lt; else if eq , dst = 0 -+ neg(dst, dst); -+ bind(done); -+} -+ -+void MacroAssembler::load_constant_pool_cache(Register cpool, Register method) -+{ -+ ld(cpool, Address(method, Method::const_offset())); -+ ld(cpool, Address(cpool, ConstMethod::constants_offset())); -+ ld(cpool, Address(cpool, ConstantPool::cache_offset_in_bytes())); -+} -+ -+void MacroAssembler::load_max_stack(Register dst, Register method) -+{ -+ ld(dst, Address(xmethod, Method::const_offset())); -+ lhu(dst, Address(dst, ConstMethod::max_stack_offset())); -+} -+ -+// The java_calling_convention describes stack locations as ideal slots on -+// a frame with no abi restrictions. Since we must observe abi restrictions -+// (like the placement of the register window) the slots must be biased by -+// the following value. -+static int reg2offset_in(VMReg r) { -+ // Account for saved fp and ra -+ // This should really be in_preserve_stack_slots -+ return r->reg2stack() * VMRegImpl::stack_slot_size; -+} -+ -+static int reg2offset_out(VMReg r) { -+ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; -+} -+ -+// On 64 bit we will store integer like items to the stack as -+// 64 bits items (riscv64 abi) even though java would only store -+// 32bits for a parameter. On 32bit it will simply be 32 bits -+// So this routine will do 32->32 on 32bit and 32->64 on 64bit -+void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ // stack to stack -+ ld(tmp, Address(fp, reg2offset_in(src.first()))); -+ sd(tmp, Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ // stack to reg -+ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } -+ } else if (dst.first()->is_stack()) { -+ // reg to stack -+ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ if (dst.first() != src.first()) { -+ // 32bits extend sign -+ addw(dst.first()->as_Register(), src.first()->as_Register(), zr); -+ } -+ } -+} -+ -+// An oop arg. Must pass a handle not the oop itself -+void MacroAssembler::object_move(OopMap* map, -+ int oop_handle_offset, -+ int framesize_in_slots, -+ VMRegPair src, -+ VMRegPair dst, -+ bool is_receiver, -+ int* receiver_offset) { -+ assert_cond(map != NULL && receiver_offset != NULL); -+ // must pass a handle. First figure out the location we use as a handle -+ Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); -+ -+ // See if oop is NULL if it is we need no handle -+ -+ if (src.first()->is_stack()) { -+ // Oop is already on the stack as an argument -+ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); -+ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); -+ if (is_receiver) { -+ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; -+ } -+ -+ ld(t0, Address(fp, reg2offset_in(src.first()))); -+ la(rHandle, Address(fp, reg2offset_in(src.first()))); -+ // conditionally move a NULL -+ Label notZero1; -+ bnez(t0, notZero1); -+ mv(rHandle, zr); -+ bind(notZero1); -+ } else { -+ -+ // Oop is in a register we must store it to the space we reserve -+ // on the stack for oop_handles and pass a handle if oop is non-NULL -+ -+ const Register rOop = src.first()->as_Register(); -+ int oop_slot = -1; -+ if (rOop == j_rarg0) { -+ oop_slot = 0; -+ } else if (rOop == j_rarg1) { -+ oop_slot = 1; -+ } else if (rOop == j_rarg2) { -+ oop_slot = 2; -+ } else if (rOop == j_rarg3) { -+ oop_slot = 3; -+ } else if (rOop == j_rarg4) { -+ oop_slot = 4; -+ } else if (rOop == j_rarg5) { -+ oop_slot = 5; -+ } else if (rOop == j_rarg6) { -+ oop_slot = 6; -+ } else { -+ assert(rOop == j_rarg7, "wrong register"); -+ oop_slot = 7; -+ } -+ -+ oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; -+ int offset = oop_slot * VMRegImpl::stack_slot_size; -+ -+ map->set_oop(VMRegImpl::stack2reg(oop_slot)); -+ // Store oop in handle area, may be NULL -+ sd(rOop, Address(sp, offset)); -+ if (is_receiver) { -+ *receiver_offset = offset; -+ } -+ -+ //rOop maybe the same as rHandle -+ if (rOop == rHandle) { -+ Label isZero; -+ beqz(rOop, isZero); -+ la(rHandle, Address(sp, offset)); -+ bind(isZero); -+ } else { -+ Label notZero2; -+ la(rHandle, Address(sp, offset)); -+ bnez(rOop, notZero2); -+ mv(rHandle, zr); -+ bind(notZero2); -+ } -+ } -+ -+ // If arg is on the stack then place it otherwise it is already in correct reg. -+ if (dst.first()->is_stack()) { -+ sd(rHandle, Address(sp, reg2offset_out(dst.first()))); -+ } -+} -+ -+// A float arg may have to do float reg int reg conversion -+void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { -+ assert(src.first()->is_stack() && dst.first()->is_stack() || -+ src.first()->is_reg() && dst.first()->is_reg() || -+ src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ lwu(tmp, Address(fp, reg2offset_in(src.first()))); -+ sw(tmp, Address(sp, reg2offset_out(dst.first()))); -+ } else if (dst.first()->is_Register()) { -+ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } else if (src.first() != dst.first()) { -+ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { -+ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } -+} -+ -+// A long move -+void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ // stack to stack -+ ld(tmp, Address(fp, reg2offset_in(src.first()))); -+ sd(tmp, Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ // stack to reg -+ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } -+ } else if (dst.first()->is_stack()) { -+ // reg to stack -+ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); -+ } else { -+ if (dst.first() != src.first()) { -+ mv(dst.first()->as_Register(), src.first()->as_Register()); -+ } -+ } -+} -+ -+// A double move -+void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { -+ assert(src.first()->is_stack() && dst.first()->is_stack() || -+ src.first()->is_reg() && dst.first()->is_reg() || -+ src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); -+ if (src.first()->is_stack()) { -+ if (dst.first()->is_stack()) { -+ ld(tmp, Address(fp, reg2offset_in(src.first()))); -+ sd(tmp, Address(sp, reg2offset_out(dst.first()))); -+ } else if (dst.first()-> is_Register()) { -+ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } else if (src.first() != dst.first()) { -+ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { -+ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } -+} -+ -+void MacroAssembler::rt_call(address dest, Register tmp) { -+ CodeBlob *cb = CodeCache::find_blob(dest); -+ if (cb) { -+ far_call(RuntimeAddress(dest)); -+ } else { -+ int32_t offset = 0; -+ la_patchable(tmp, RuntimeAddress(dest), offset); -+ jalr(x1, tmp, offset); -+ } -+} -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -new file mode 100644 -index 000000000..a4d5ce0e0 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp -@@ -0,0 +1,975 @@ -+/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP -+#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP -+ -+#include "asm/assembler.inline.hpp" -+#include "code/vmreg.hpp" -+// MacroAssembler extends Assembler by frequently used macros. -+// -+// Instructions for which a 'better' code sequence exists depending -+// on arguments should also go in here. -+ -+class MacroAssembler: public Assembler { ++class MacroAssembler: public Assembler { + + public: + MacroAssembler(CodeBuffer* code) : Assembler(code) { @@ -24277,12 +24306,38 @@ index 000000000..a4d5ce0e0 + void safepoint_poll(Label& slow_path); + void safepoint_poll_acquire(Label& slow_path); + ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // swap_reg is killed. ++ // tmp_reg must be supplied and must not be rscratch1 or rscratch2 ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ int biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL, ++ Register flag = noreg); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done, Register flag = noreg); ++ ++ // Helper functions for statistics gathering. ++ // Unconditional atomic increment. ++ void atomic_incw(Register counter_addr, Register tmp); ++ void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { ++ la(tmp1, counter_addr); ++ atomic_incw(tmp1, tmp2); ++ } ++ + // Alignment -+ void align(int modulus); ++ void align(int modulus, int extra_offset = 0); + + // Stack frame creation/removal + // Note that SP must be updated to the right place before saving/restoring RA and FP -+ // because signal based thread suspend/resume could happend asychronously ++ // because signal based thread suspend/resume could happen asynchronously. + void enter() { + addi(sp, sp, - 2 * wordSize); + sd(ra, Address(sp, wordSize)); @@ -24415,7 +24470,7 @@ index 000000000..a4d5ce0e0 + void access_load_at(BasicType type, DecoratorSet decorators, Register dst, + Address src, Register tmp1, Register thread_tmp); + void access_store_at(BasicType type, DecoratorSet decorators, Address dst, -+ Register src, Register tmp1, Register tmp2, Register tmp3); ++ Register src, Register tmp1, Register thread_tmp); + void load_klass(Register dst, Register src); + void store_klass(Register dst, Register src); + void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L); @@ -24435,7 +24490,7 @@ index 000000000..a4d5ce0e0 + void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, -+ Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0); ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); + + void store_klass_gap(Register dst, Register src); + @@ -24444,6 +24499,8 @@ index 000000000..a4d5ce0e0 + // stored using routines that take a jobject. + void store_heap_oop_null(Address dst); + ++ void load_prototype_header(Register dst, Register src); ++ + // This dummy is to prevent a call to store_heap_oop from + // converting a zero (linke NULL) into a Register by giving + // the compiler two choices it can't resolve @@ -24459,6 +24516,7 @@ index 000000000..a4d5ce0e0 + + virtual void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); ++ static bool uses_implicit_null_check(void* address); + + // idiv variant which deals with MINLONG as dividend and -1 as divisor + int corrected_idivl(Register result, Register rs1, Register rs2, @@ -24481,22 +24539,29 @@ index 000000000..a4d5ce0e0 + RegisterOrConstant vtable_index, + Register method_result); + ++ // Form an addres from base + offset in Rd. Rd my or may not ++ // actually be used: you must use the Address that is returned. It ++ // is up to you to ensure that the shift provided mathces the size ++ // of your data. ++ Address form_address(Register Rd, Register base, long byte_offset); ++ + // allocation -+ void eden_allocate( ++ void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register tmp1, // temp register -+ Label& slow_case, // continuation point if fast allocation fails ++ Register tmp2, // temp register ++ Label& slow_case, // continuation point of fast allocation fails + bool is_far = false + ); -+ void tlab_allocate( ++ ++ void eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time -+ Register tmp1, // temp register -+ Register tmp2, // temp register -+ Label& slow_case, // continuation point of fast allocation fails ++ Register tmp, // temp register ++ Label& slow_case, // continuation point if fast allocation fails + bool is_far = false + ); + @@ -24516,11 +24581,11 @@ index 000000000..a4d5ce0e0 + + // The reset of the type cehck; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. -+ // The tmp_reg and tmp2_reg can be noreg, if no tmps are avaliable. ++ // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable. + // Updates the sub's secondary super cache as necessary. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, -+ Register tmp_reg, ++ Register tmp1_reg, + Register tmp2_reg, + Label* L_success, + Label* L_failure); @@ -24580,20 +24645,14 @@ index 000000000..a4d5ce0e0 + + void unimplemented(const char* what = ""); + -+ void should_not_reach_here() { stop("should not reach here"); } -+ -+ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, -+ Register tmp, -+ int offset) { -+ return RegisterOrConstant(tmp); -+ } ++ void should_not_reach_here() { stop("should not reach here"); } + + static address target_addr_for_insn(address insn_addr); + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. -+ static int pd_patch_instruction_size(address branch, address target) ; -+ void pd_patch_instruction(address branch, address target) { ++ static int pd_patch_instruction_size(address branch, address target); ++ static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) { + pd_patch_instruction_size(branch, target); + } + static address pd_call_destination(address branch) { @@ -24619,12 +24678,9 @@ index 000000000..a4d5ce0e0 + void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + public: -+ // enum used for riscv--x86 linkage to define return type of x86 function -+ enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double}; -+ + // Standard pseudoinstruction + void nop(); -+ void mv(Register Rd, Register Rs) ; ++ void mv(Register Rd, Register Rs); + void notr(Register Rd, Register Rs); + void neg(Register Rd, Register Rs); + void negw(Register Rd, Register Rs); @@ -24671,11 +24727,11 @@ index 000000000..a4d5ce0e0 + void fsflagsi(unsigned imm); + + void beqz(Register Rs, const address &dest); ++ void bnez(Register Rs, const address &dest); + void blez(Register Rs, const address &dest); + void bgez(Register Rs, const address &dest); + void bltz(Register Rs, const address &dest); + void bgtz(Register Rs, const address &dest); -+ void bnez(Register Rs, const address &dest); + void la(Register Rd, Label &label); + void la(Register Rd, const address &dest); + void la(Register Rd, const Address &adr); @@ -24705,11 +24761,25 @@ index 000000000..a4d5ce0e0 + void pop_reg(Register Rd); + int push_reg(unsigned int bitset, Register stack); + int pop_reg(unsigned int bitset, Register stack); -+ static RegSet call_clobbered_registers(); -+ void push_call_clobbered_registers(); -+ void pop_call_clobbered_registers(); -+ void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); -+ void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); ++ ++ // Push and pop everything that might be clobbered by a native ++ // runtime call except t0 and t1. (They are always ++ // temporary registers, so we don't have to protect them.) ++ // Additional registers can be excluded in a passed RegSet. ++ void push_call_clobbered_registers_except(RegSet exclude); ++ void pop_call_clobbered_registers_except(RegSet exclude); ++ ++ void push_call_clobbered_registers() { ++ push_call_clobbered_registers_except(RegSet()); ++ } ++ void pop_call_clobbered_registers() { ++ pop_call_clobbered_registers_except(RegSet()); ++ } ++ ++ void pusha(); ++ void popa(); ++ void push_CPU_state(); ++ void pop_CPU_state(); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); @@ -24721,8 +24791,6 @@ index 000000000..a4d5ce0e0 + } + + // mv -+ void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); } -+ + inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); } + inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); } + inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); } @@ -24733,6 +24801,7 @@ index 000000000..a4d5ce0e0 + inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } + + void mv(Register Rd, Address dest); ++ void mv(Register Rd, address dest); + void mv(Register Rd, RegisterOrConstant src); + + // logic @@ -24740,26 +24809,6 @@ index 000000000..a4d5ce0e0 + void orrw(Register Rd, Register Rs1, Register Rs2); + void xorrw(Register Rd, Register Rs1, Register Rs2); + -+ // vext -+ void vmnot_m(VectorRegister vd, VectorRegister vs); -+ void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); -+ void vfneg_v(VectorRegister vd, VectorRegister vs); -+ -+ // support for argument shuffling -+ void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0); -+ void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0); -+ void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0); -+ void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0); -+ void object_move(OopMap* map, -+ int oop_handle_offset, -+ int framesize_in_slots, -+ VMRegPair src, -+ VMRegPair dst, -+ bool is_receiver, -+ int* receiver_offset); -+ -+ void rt_call(address dest, Register tmp = t0); -+ + // revb + void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend + void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend @@ -24770,14 +24819,12 @@ index 000000000..a4d5ce0e0 + void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word + void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword + -+ void andi(Register Rd, Register Rn, int64_t increment, Register tmp = t0); ++ void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); ++ void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0); + void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); + -+ // Support for serializing memory accesses between threads -+ void serialize_memory(Register thread, Register tmp1, Register tmp2); -+ + void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); -+ void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail) ; ++ void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); + void cmpxchg(Register addr, Register expected, + Register new_val, + enum operand_size size, @@ -24817,39 +24864,14 @@ index 000000000..a4d5ce0e0 + void atomic_xchgwu(Register prev, Register newv, Register addr); + void atomic_xchgalwu(Register prev, Register newv, Register addr); + -+ // Biased locking support -+ // lock_reg and obj_reg must be loaded up with the appropriate values. -+ // swap_reg is killed. -+ // tmp_reg must be supplied and must not be t0 or t1 -+ // Optional slow case is for implementations (interpreter and C1) which branch to -+ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. -+ // Returns offset of first potentially-faulting instruction for null -+ // check info (currently consumed only by C1). If -+ // swap_reg_contains_mark is true then returns -1 as it is assumed -+ // the calling code has already passed any potential faults. -+ int biased_locking_enter(Register lock_reg, Register obj_reg, -+ Register swap_reg, Register tmp_reg, -+ bool swap_reg_contains_mark, -+ Label& done, Label* slow_case = NULL, -+ BiasedLockingCounters* counters = NULL, -+ Register flag = noreg); -+ void biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag = noreg); -+ + static bool far_branches() { + return ReservedCodeCacheSize > branch_range; + } + -+ //atomic -+ void atomic_incw(Register counter_addr, Register tmp1); -+ void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { -+ la(tmp1, counter_addr); -+ atomic_incw(tmp1, tmp2); -+ } -+ + // Jumps that can reach anywhere in the code cache. + // Trashes tmp. -+ void far_call(Address entry, Register tmp = t0); -+ void far_jump(Address entry, Register tmp = t0); ++ void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); ++ void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); + + static int far_branch_size() { + if (far_branches()) { @@ -24864,8 +24886,8 @@ index 000000000..a4d5ce0e0 + void bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); -+ sub(t1, sp, offset); -+ sd(zr, Address(t1)); ++ sub(t0, sp, offset); ++ sd(zr, Address(t0)); + } + + void la_patchable(Register reg1, const Address &dest, int32_t &offset); @@ -24873,123 +24895,45 @@ index 000000000..a4d5ce0e0 + virtual void _call_Unimplemented(address call_site) { + mv(t1, call_site); + } -+ #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) -+ -+#ifdef COMPILER2 -+ void spill(Register Rx, bool is64, int offset) { -+ is64 ? sd(Rx, Address(sp, offset)) -+ : sw(Rx, Address(sp, offset)); -+ } -+ -+ void spill(FloatRegister Rx, bool is64, int offset) { -+ is64 ? fsd(Rx, Address(sp, offset)) -+ : fsw(Rx, Address(sp, offset)); -+ } -+ -+ void spill(VectorRegister Vx, int offset) { -+ add(t0, sp, offset); -+ vs1r_v(Vx, t0); -+ } -+ -+ void unspill(Register Rx, bool is64, int offset) { -+ is64 ? ld(Rx, Address(sp, offset)) -+ : lw(Rx, Address(sp, offset)); -+ } -+ -+ void unspillu(Register Rx, bool is64, int offset) { -+ is64 ? ld(Rx, Address(sp, offset)) -+ : lwu(Rx, Address(sp, offset)); -+ } -+ -+ void unspill(FloatRegister Rx, bool is64, int offset) { -+ is64 ? fld(Rx, Address(sp, offset)) -+ : flw(Rx, Address(sp, offset)); -+ } -+ -+ void unspill(VectorRegister Vx, int offset) { -+ add(t0, sp, offset); -+ vl1r_v(Vx, t0); -+ } -+ -+ void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, -+ int vec_reg_size_in_bytes) { -+ assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); -+ unspill(v0, src_offset); -+ spill(v0, dst_offset); -+ } + -+#endif // COMPILER2 ++ #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) + + // Frame creation and destruction shared between JITs. + void build_frame(int framesize); + void remove_frame(int framesize); + + void reserved_stack_check(); ++ ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); ++ + void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype); + void read_polling_page(Register r, address page, relocInfo::relocType rtype); + void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); -+ // Return: the call PC -+ address trampoline_call(Address entry); ++ ++ address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); + address ic_call(address entry, jint method_index = 0); -+ // Support for memory inc/dec -+ // n.b. increment/decrement calls with an Address destination will -+ // need to use a scratch register to load the value to be -+ // incremented. increment/decrement calls which add or subtract a -+ // constant value other than sign-extended 12-bit immediate will need -+ // to use a 2nd scratch register to hold the constant. so, an address -+ // increment/decrement may trash both t0 and t1. -+ -+ void increment(const Address dst, int64_t value = 1); -+ void incrementw(const Address dst, int32_t value = 1); -+ -+ void decrement(const Address dst, int64_t value = 1); -+ void decrementw(const Address dst, int32_t value = 1); -+ void cmpptr(Register src1, Address src2, Label& equal); -+ void oop_equal(Register obj1, Register obj2, Label& equal, bool is_far = false); // cmpoop -+ void oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far = false); -+ void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); -+#ifdef COMPILER2 -+ void minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, bool is_double, bool is_min); + -+ address arrays_equals(Register a1, Register a2, Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, Register result, Register cnt1, int elem_size); ++ void add_memory_int64(const Address dst, int64_t imm); ++ void add_memory_int32(const Address dst, int32_t imm); + -+ void string_equals(Register a1, Register a2, Register result, Register cnt1, -+ int elem_size); -+ void string_compare(Register str1, Register str2, -+ Register cnt1, Register cnt2, Register result, -+ Register tmp1, Register tmp2, Register tmp3, int ae); -+ void string_indexof_char_short(Register str1, Register cnt1, -+ Register ch, Register result, -+ bool isL); -+ void string_indexof_char(Register str1, Register cnt1, -+ Register ch, Register result, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ bool isL); -+ void string_indexof(Register str1, Register str2, -+ Register cnt1, Register cnt2, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ Register tmp5, Register tmp6, -+ Register result, int ae); -+ void string_indexof_linearscan(Register haystack, Register needle, -+ Register haystack_len, Register needle_len, -+ Register tmp1, Register tmp2, -+ Register tmp3, Register tmp4, -+ int needle_con_cnt, Register result, int ae); -+ void compute_index(Register str1, Register trailing_zero, Register match_mask, ++ void cmpptr(Register src1, Address src2, Label& equal); ++ ++ void compute_index(Register str1, Register trailing_zeros, Register match_mask, + Register result, Register char_tmp, Register tmp, + bool haystack_isL); + void compute_match_mask(Register src, Register pattern, Register match_mask, + Register mask1, Register mask2); ++ ++#ifdef COMPILER2 ++ void mul_add(Register out, Register in, Register offset, ++ Register len, Register k, Register tmp); + void cad(Register dst, Register src1, Register src2, Register carry); + void cadc(Register dst, Register src1, Register src2, Register carry); + void adc(Register dst, Register src1, Register src2, Register carry); + void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, -+ Register src1, Register src2, Register carry = t0); -+ void mul_add(Register out, Register in, Register offset, -+ Register len, Register k, Register tmp1, Register tmp2); ++ Register src1, Register src2, Register carry); + void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, @@ -25008,49 +24952,21 @@ index 000000000..a4d5ce0e0 + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6, Register product_hi); -+#endif // COMPILER2 ++#endif ++ + void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); + void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); -+ ++ + void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); -+ void zero_words(Register base, uint64_t cnt); ++ ++ void zero_words(Register base, u_int64_t cnt); + address zero_words(Register ptr, Register cnt); + void fill_words(Register base, Register cnt, Register value); -+ void zero_memory(Register addr, Register len, Register tmp1); ++ void zero_memory(Register addr, Register len, Register tmp); + -+ // shift left by shamt and add ++ // shift left by shamt and add + void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); + -+#ifdef COMPILER2 -+ // refer to conditional_branches and float_conditional_branches -+ static const int bool_test_bits = 3; -+ static const int neg_cond_bits = 2; -+ static const int unsigned_branch_mask = 1 << bool_test_bits; -+ static const int double_branch_mask = 1 << bool_test_bits; -+ -+ void enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src); -+ -+ // cmp -+ void cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far = false); -+ void float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far = false); -+ -+ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false); -+ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false); -+ -+ // intrinsic methods implemented by vector instructions -+ void string_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size); -+ void arrays_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size); -+ void string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, -+ Register result, Register tmp1, Register tmp2, int encForm); -+ -+ void clear_array_v(Register base, Register cnt); -+ address byte_array_inflate_v(Register src, Register dst, Register len, Register tmp); -+ void char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp); -+ void encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp); -+ -+ address has_negatives_v(Register ary, Register len, Register result, Register tmp); -+#endif -+ + // Here the float instructions with safe deal with some exceptions. + // e.g. convert from NaN, +Inf, -Inf to int, float, double + // will trigger exception, we need to deal with these situations @@ -25138,13 +25054,15 @@ index 000000000..a4d5ce0e0 + // if [src1 < src2], dst = -1; + void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0); + -+ void load_constant_pool_cache(Register cpool, Register method); ++ int push_fp(unsigned int bitset, Register stack); ++ int pop_fp(unsigned int bitset, Register stack); + -+ void load_max_stack(Register dst, Register method); ++ // vext ++ void vmnot_m(VectorRegister vd, VectorRegister vs); ++ void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); ++ void vfneg_v(VectorRegister vd, VectorRegister vs); + +private: -+ void load_prototype_header(Register dst, Register src); -+ void repne_scan(Register addr, Register value, Register count, Register tmp); + +#ifdef ASSERT + // Macro short-hand support to clean-up after a failed call to trampoline @@ -25155,10 +25073,11 @@ index 000000000..a4d5ce0e0 +#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3) +#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5) +#endif ++ void repne_scan(Register addr, Register value, Register count, Register tmp); + + // Return true if an address is within the 48-bit RISCV64 address space. + bool is_valid_riscv64_address(address addr) { -+ // sv48: must have bits 63-48 all equal to bit 47 ++ // sv48: must have bits 63–48 all equal to bit 47 + return ((uintptr_t)addr >> 47) == 0; + } + @@ -25178,47 +25097,146 @@ index 000000000..a4d5ce0e0 + void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); + void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); + -+#ifdef COMPILER2 -+ void element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, -+ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE); -+#endif // COMPILER2 -+}; ++public: ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ Register tmp1, Register tmp2, Register tmp3, ++ int ae); + -+#ifdef ASSERT -+inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } -+#endif ++ void string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL); + -+/** -+ * class SkipIfEqual: -+ * -+ * Instantiating this class will result in assembly code being output that will -+ * jump around any code emitted between the creation of the instance and it's -+ * automatic destruction at the end of a scope block, depending on the value of -+ * the flag passed to the constructor, which will be checked at run-time. -+ */ -+class SkipIfEqual { -+ private: -+ MacroAssembler* _masm; -+ Label _label; ++ void string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL); + -+ public: -+ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); -+ ~SkipIfEqual(); -+}; -+#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP -diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp -new file mode 100644 -index 000000000..fc2b191c0 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it ++ void string_indexof(Register str1, Register str2, ++ Register cnt1, Register cnt2, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae); ++ ++ void string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae); ++ ++ void arrays_equals(Register r1, Register r2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, Register cnt1, ++ int elem_size); ++ ++ void string_equals(Register r1, Register r2, ++ Register result, Register cnt1, ++ int elem_size); ++ ++ // refer to conditional_branches and float_conditional_branches ++ static const int bool_test_bits = 3; ++ static const int neg_cond_bits = 2; ++ static const int unsigned_branch_mask = 1 << bool_test_bits; ++ static const int double_branch_mask = 1 << bool_test_bits; ++ ++ // cmp ++ void cmp_branch(int cmpFlag, ++ Register op1, Register op2, ++ Label& label, bool is_far = false); ++ ++ void float_cmp_branch(int cmpFlag, ++ FloatRegister op1, FloatRegister op2, ++ Label& label, bool is_far = false); ++ ++ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op, ++ Label& L, bool is_far = false); ++ ++ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op, ++ Label& L, bool is_far = false); ++ ++ void enc_cmove(int cmpFlag, ++ Register op1, Register op2, ++ Register dst, Register src); ++ ++ void spill(Register r, bool is64, int offset) { ++ is64 ? sd(r, Address(sp, offset)) ++ : sw(r, Address(sp, offset)); ++ } ++ ++ void spill(FloatRegister f, bool is64, int offset) { ++ is64 ? fsd(f, Address(sp, offset)) ++ : fsw(f, Address(sp, offset)); ++ } ++ ++ void spill(VectorRegister v, int offset) { ++ add(t0, sp, offset); ++ vs1r_v(v, t0); ++ } ++ ++ void unspill(Register r, bool is64, int offset) { ++ is64 ? ld(r, Address(sp, offset)) ++ : lw(r, Address(sp, offset)); ++ } ++ ++ void unspillu(Register r, bool is64, int offset) { ++ is64 ? ld(r, Address(sp, offset)) ++ : lwu(r, Address(sp, offset)); ++ } ++ ++ void unspill(FloatRegister f, bool is64, int offset) { ++ is64 ? fld(f, Address(sp, offset)) ++ : flw(f, Address(sp, offset)); ++ } ++ ++ void unspill(VectorRegister v, int offset) { ++ add(t0, sp, offset); ++ vl1r_v(v, t0); ++ } ++ ++ void minmax_FD(FloatRegister dst, ++ FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min); ++ ++}; ++ ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } ++#endif ++ ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++ private: ++ MacroAssembler* _masm; ++ Label _label; ++ ++ public: ++ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); ++ ~SkipIfEqual(); ++}; ++ ++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp +new file mode 100644 +index 0000000000..ef968ccd96 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * @@ -25241,17 +25259,19 @@ index 000000000..fc2b191c0 +#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP +#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP + ++// Still empty. ++ +#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp new file mode 100644 -index 000000000..d049193d4 +index 0000000000..fd907f77af --- /dev/null +++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp -@@ -0,0 +1,440 @@ +@@ -0,0 +1,450 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25280,9 +25300,11 @@ index 000000000..d049193d4 +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" ++#include "prims/jvmtiExport.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/flags/flagSetting.hpp" +#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" + +#define __ _masm-> + @@ -25295,6 +25317,7 @@ index 000000000..d049193d4 +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ assert_cond(_masm != NULL); + if (VerifyMethodHandles) { + verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), + "MH argument is a Class"); @@ -25316,6 +25339,7 @@ index 000000000..d049193d4 +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message) { ++ assert_cond(_masm != NULL); + InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); + Klass* klass = SystemDictionary::well_known_klass(klass_id); + Register temp = t1; @@ -25338,12 +25362,13 @@ index 000000000..d049193d4 + BLOCK_COMMENT("} verify_klass"); +} + -+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { } ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {} + +#endif //ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry) { ++ assert_cond(_masm != NULL); + assert(method == xmethod, "interpreter calling convention"); + Label L_no_such_method; + __ beqz(xmethod, L_no_such_method); @@ -25374,6 +25399,7 @@ index 000000000..d049193d4 + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry) { ++ assert_cond(_masm != NULL); + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. @@ -25399,7 +25425,7 @@ index 000000000..d049193d4 + sizeof(u2), /*is_signed*/ false); + Label L; + __ ld(t0, __ argument_address(temp2, -1)); -+ __ oop_equal(recv, t0, L); ++ __ beq(recv, t0, L); + __ ld(x10, __ argument_address(temp2, -1)); + __ ebreak(); + __ BIND(L); @@ -25412,6 +25438,7 @@ index 000000000..d049193d4 +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { ++ assert_cond(_masm != NULL); + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || @@ -25427,6 +25454,7 @@ index 000000000..d049193d4 + // xmethod: Method* + // x13: argument locator (parameter slot count, added to sp) + // x11: used as temp to hold mh or receiver ++ // x10, x29: garbage temps, blown away + Register argp = x13; // argument list ptr, live on error paths + Register mh = x11; // MH receiver; dies quickly and is recycled + @@ -25476,7 +25504,6 @@ index 000000000..d049193d4 + trace_method_handle_interpreter_entry(_masm, iid); + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry); -+ + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register recv = noreg; @@ -25499,6 +25526,7 @@ index 000000000..d049193d4 + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { ++ assert_cond(_masm != NULL); + assert(is_signature_polymorphic(iid), "expected invoke iid"); + // temps used in this code are not used in *either* compiled or interpreted calling sequences + Register temp1 = x7; @@ -25592,7 +25620,8 @@ index 000000000..d049193d4 + __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); + break; + -+ case vmIntrinsics::_linkToVirtual: { ++ case vmIntrinsics::_linkToVirtual: ++ { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + @@ -25619,7 +25648,8 @@ index 000000000..d049193d4 + break; + } + -+ case vmIntrinsics::_linkToInterface: { ++ case vmIntrinsics::_linkToInterface: ++ { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { @@ -25671,7 +25701,7 @@ index 000000000..d049193d4 + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, -+ oop mh, ++ oopDesc* mh, + intptr_t* saved_regs, + intptr_t* entry_sp) { } + @@ -25690,13 +25720,12 @@ index 000000000..d049193d4 +#endif //PRODUCT diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp new file mode 100644 -index 000000000..8ed69efe8 +index 0000000000..65493eba76 --- /dev/null +++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp -@@ -0,0 +1,58 @@ +@@ -0,0 +1,57 @@ +/* + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -25754,13 +25783,13 @@ index 000000000..8ed69efe8 + bool for_compiler_entry); diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp new file mode 100644 -index 000000000..4b1573130 +index 0000000000..27011ad128 --- /dev/null +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp -@@ -0,0 +1,404 @@ +@@ -0,0 +1,417 @@ +/* -+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -25786,10 +25815,12 @@ index 000000000..4b1573130 + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" ++#include "code/compiledIC.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" ++#include "runtime/orderAccess.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" @@ -25981,24 +26012,20 @@ index 000000000..4b1573130 + // Find and replace the oop/metadata corresponding to this + // instruction in oops section. + CodeBlob* cb = CodeCache::find_blob(instruction_address()); -+ if(cb != NULL) { -+ nmethod* nm = cb->as_nmethod_or_null(); -+ if (nm != NULL) { -+ RelocIterator iter(nm, instruction_address(), next_instruction_address()); -+ while (iter.next()) { -+ if (iter.type() == relocInfo::oop_type) { -+ oop* oop_addr = iter.oop_reloc()->oop_addr(); -+ *oop_addr = cast_to_oop(x); -+ break; -+ } else if (iter.type() == relocInfo::metadata_type) { -+ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); -+ *metadata_addr = (Metadata*)x; -+ break; -+ } ++ nmethod* nm = cb->as_nmethod_or_null(); ++ if (nm != NULL) { ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(x); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)x; ++ break; + } + } -+ } else { -+ ShouldNotReachHere(); + } +} + @@ -26040,6 +26067,16 @@ index 000000000..4b1573130 + return dest; +}; + ++void NativeJump::set_jump_destination(address dest) { ++ // We use jump to self as the unresolved address which the inline ++ // cache code (and relocs) know about ++ if (dest == (address) -1) ++ dest = instruction_address(); ++ ++ MacroAssembler::pd_patch_instruction(instruction_address(), dest); ++ ICache::invalidate_range(instruction_address(), instruction_size); ++} ++ +//------------------------------------------------------------------- + +address NativeGeneralJump::jump_destination() const { @@ -26061,6 +26098,7 @@ index 000000000..4b1573130 +} + +bool NativeInstruction::is_lwu_to_zr(address instr) { ++ assert_cond(instr != NULL); + return (extract_opcode(instr) == 0b0000011 && + extract_funct3(instr) == 0b110 && + extract_rd(instr) == zr); // zr @@ -26077,6 +26115,10 @@ index 000000000..4b1573130 + *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction +} + ++bool NativeInstruction::is_stop() { ++ return uint_at(0) == 0xffffffff; // an illegal instruction ++} ++ +//------------------------------------------------------------------- + +// MT-safe inserting of a jump over a jump or a nop (used by @@ -26164,14 +26206,14 @@ index 000000000..4b1573130 +} diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp new file mode 100644 -index 000000000..e8a4e0a46 +index 0000000000..2e5c84ee3b --- /dev/null +++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp -@@ -0,0 +1,561 @@ +@@ -0,0 +1,555 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -26215,12 +26257,15 @@ index 000000000..e8a4e0a46 +// The base class for different kinds of native instruction abstractions. +// Provides the primitive operations to manipulate code relative to this. + ++class NativeCall; ++ +class NativeInstruction { + friend class Relocation; + friend bool is_NativeCallTrampolineStub_at(address); + public: + enum { -+ instruction_size = 4 ++ instruction_size = 4, ++ compressed_instruction_size = 2, + }; + + juint encoding() const { @@ -26246,7 +26291,7 @@ index 000000000..e8a4e0a46 + static bool is_slli_shift_at(address instr, uint32_t shift) { + assert_cond(instr != NULL); + return (extract_opcode(instr) == 0b0010011 && // opcode field -+ extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation ++ extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation + Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift); // shamt field + } + @@ -26341,9 +26386,10 @@ index 000000000..e8a4e0a46 + // load + static bool check_load_pc_relative_data_dependency(address instr) { + address auipc = instr; -+ address last_instr = auipc + instruction_size; ++ address load = auipc + instruction_size; + -+ return extract_rs1(last_instr) == extract_rd(auipc); ++ return extract_rd(load) == extract_rd(auipc) && ++ extract_rs1(load) == extract_rd(load); + } + + static bool is_movptr_at(address instr); @@ -26364,6 +26410,7 @@ index 000000000..e8a4e0a46 + inline bool is_jump_or_nop(); + bool is_safepoint_poll(); + bool is_sigill_zombie_not_entrant(); ++ bool is_stop(); + + protected: + address addr_at(int offset) const { return address(this) + offset; } @@ -26422,27 +26469,24 @@ index 000000000..e8a4e0a46 + address return_address() const { return addr_at(return_address_offset); } + address destination() const; + -+ void set_destination(address dest) { -+ if (is_jal()) { -+ intptr_t offset = (intptr_t)(dest - instruction_address()); -+ assert((offset & 0x1) == 0, "should be aligned"); -+ assert(is_imm_in_range(offset, 20, 1), "set_destination, offset is too large to be patched in one jal insrusction\n"); -+ unsigned int insn = 0b1101111; // jal -+ address pInsn = (address)(&insn); -+ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); -+ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); -+ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); -+ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); -+ Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra -+ set_int_at(displacement_offset, insn); -+ return; -+ } -+ ShouldNotReachHere(); ++ void set_destination(address dest) { ++ assert(is_jal(), "Should be jal instruction!"); ++ intptr_t offset = (intptr_t)(dest - instruction_address()); ++ assert((offset & 0x1) == 0, "bad alignment"); ++ assert(is_imm_in_range(offset, 20, 1), "encoding constraint"); ++ unsigned int insn = 0b1101111; // jal ++ address pInsn = (address)(&insn); ++ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); ++ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); ++ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); ++ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); ++ Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra ++ set_int_at(displacement_offset, insn); + } + -+ void verify_alignment() { ; } -+ void verify(); -+ void print(); ++ void verify_alignment() {} // do nothing on riscv ++ void verify(); ++ void print(); + + // Creation + inline friend NativeCall* nativeCall_at(address addr); @@ -26478,18 +26522,14 @@ index 000000000..e8a4e0a46 +inline NativeCall* nativeCall_at(address addr) { + assert_cond(addr != NULL); + NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset); -+#ifdef ASSERT -+ call->verify(); -+#endif ++ DEBUG_ONLY(call->verify()); + return call; +} + +inline NativeCall* nativeCall_before(address return_address) { + assert_cond(return_address != NULL); + NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); -+#ifdef ASSERT -+ call->verify(); -+#endif ++ DEBUG_ONLY(call->verify()); + return call; +} + @@ -26529,7 +26569,7 @@ index 000000000..e8a4e0a46 + } + + intptr_t data() const; -+ void set_data(intptr_t x); ++ void set_data(intptr_t x); + + void flush() { + if (!maybe_cpool_ref(instruction_address())) { @@ -26537,8 +26577,8 @@ index 000000000..e8a4e0a46 + } + } + -+ void verify(); -+ void print(); ++ void verify(); ++ void print(); + + // Creation + inline friend NativeMovConstReg* nativeMovConstReg_at(address addr); @@ -26548,55 +26588,53 @@ index 000000000..e8a4e0a46 +inline NativeMovConstReg* nativeMovConstReg_at(address addr) { + assert_cond(addr != NULL); + NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset); -+#ifdef ASSERT -+ test->verify(); -+#endif ++ DEBUG_ONLY(test->verify()); + return test; +} + +inline NativeMovConstReg* nativeMovConstReg_before(address addr) { + assert_cond(addr != NULL); + NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); -+#ifdef ASSERT -+ test->verify(); -+#endif ++ DEBUG_ONLY(test->verify()); + return test; +} + -+// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented. ++// RISCV should not use C1 runtime patching, but still implement ++// NativeMovRegMem to keep some compilers happy. +class NativeMovRegMem: public NativeInstruction { + public: -+ int instruction_start() const { -+ Unimplemented(); -+ return 0; -+ } ++ enum RISCV_specific_constants { ++ instruction_size = NativeInstruction::instruction_size, ++ instruction_offset = 0, ++ data_offset = 0, ++ next_instruction_offset = NativeInstruction::instruction_size ++ }; + -+ address instruction_address() const { -+ Unimplemented(); -+ return NULL; -+ } ++ int instruction_start() const { return instruction_offset; } + -+ int num_bytes_to_end_of_patch() const { -+ Unimplemented(); -+ return 0; -+ } ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; } + + int offset() const; + + void set_offset(int x); + -+ void add_offset_in_bytes(int add_offset) { Unimplemented(); } ++ void add_offset_in_bytes(int add_offset) { ++ set_offset(offset() + add_offset); ++ } + + void verify(); + void print(); + + private: -+ inline friend NativeMovRegMem* nativeMovRegMem_at (address addr); ++ inline friend NativeMovRegMem* nativeMovRegMem_at(address addr); +}; + -+inline NativeMovRegMem* nativeMovRegMem_at (address addr) { -+ Unimplemented(); -+ return NULL; ++inline NativeMovRegMem* nativeMovRegMem_at(address addr) { ++ NativeMovRegMem* test = (NativeMovRegMem*)(addr - NativeMovRegMem::instruction_offset); ++ DEBUG_ONLY(test->verify()); ++ return test; +} + +class NativeJump: public NativeInstruction { @@ -26611,15 +26649,13 @@ index 000000000..e8a4e0a46 + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { return addr_at(instruction_size); } + address jump_destination() const; ++ void set_jump_destination(address dest); + + // Creation + inline friend NativeJump* nativeJump_at(address address); + + void verify(); + -+ // Unit testing stuff -+ static void test() {} -+ + // Insertion of native jump instruction + static void insert(address code_pos, address entry); + // MT-safe insertion of native jump at verified method entry @@ -26629,9 +26665,7 @@ index 000000000..e8a4e0a46 + +inline NativeJump* nativeJump_at(address addr) { + NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset); -+#ifdef ASSERT -+ jump->verify(); -+#endif ++ DEBUG_ONLY(jump->verify()); + return jump; +} + @@ -26699,7 +26733,9 @@ index 000000000..e8a4e0a46 + // 3). check if the offset in ld[31:20] equals the data_offset + assert_cond(addr != NULL); + const int instr_size = NativeInstruction::instruction_size; -+ if (NativeInstruction::is_auipc_at(addr) && NativeInstruction::is_ld_at(addr + instr_size) && NativeInstruction::is_jalr_at(addr + 2 * instr_size) && ++ if (NativeInstruction::is_auipc_at(addr) && ++ NativeInstruction::is_ld_at(addr + instr_size) && ++ NativeInstruction::is_jalr_at(addr + 2 * instr_size) && + (NativeInstruction::extract_rd(addr) == x5) && + (NativeInstruction::extract_rd(addr + instr_size) == x5) && + (NativeInstruction::extract_rs1(addr + instr_size) == x5) && @@ -26731,13 +26767,12 @@ index 000000000..e8a4e0a46 +#endif // CPU_RISCV_NATIVEINST_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp new file mode 100644 -index 000000000..04a36c1c7 +index 0000000000..fef8ca9b64 --- /dev/null +++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp -@@ -0,0 +1,46 @@ +@@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -26770,10 +26805,7 @@ index 000000000..04a36c1c7 + private: + // This is the hook for finding a register in an "well-known" location, + // such as a register block of a predetermined format. -+ // Since there is none, we just return NULL. -+ // See registerMap_riscv.hpp for an example of grabbing registers -+ // from register save areas of a standard layout. -+ address pd_location(VMReg reg) const {return NULL;} ++ address pd_location(VMReg reg) const { return NULL; } + + // no PD state to clear or copy: + void pd_clear() {} @@ -26783,14 +26815,14 @@ index 000000000..04a36c1c7 +#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp new file mode 100644 -index 000000000..b30c1b107 +index 0000000000..583f67573c --- /dev/null +++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp -@@ -0,0 +1,193 @@ +@@ -0,0 +1,192 @@ +/* + * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -26815,7 +26847,6 @@ index 000000000..b30c1b107 + +#include "precompiled.hpp" +#include "asm/assembler.hpp" -+#include "asm/macroAssembler.inline.hpp" +#include "asm/register.hpp" +#include "interp_masm_riscv.hpp" +#include "register_riscv.hpp" @@ -26982,14 +27013,13 @@ index 000000000..b30c1b107 +REGISTER_DEFINITION(Register, t2); diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp new file mode 100644 -index 000000000..76215ef2a +index 0000000000..ef60cb3bb0 --- /dev/null +++ b/src/hotspot/cpu/riscv/register_riscv.cpp -@@ -0,0 +1,69 @@ +@@ -0,0 +1,64 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -27017,18 +27047,14 @@ index 000000000..76215ef2a + +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * + RegisterImpl::max_slots_per_register; ++ +const int ConcreteRegisterImpl::max_fpr = + ConcreteRegisterImpl::max_gpr + + FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; + -+const int ConcreteRegisterImpl::max_vpr = -+ ConcreteRegisterImpl::max_fpr + -+ VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register; -+ -+ +const char* RegisterImpl::name() const { -+ const char* names[number_of_registers] = { -+ "zr", "ra", "sp", "gp", "tp", "x5", "x6", "x7", "fp", "x9", ++ static const char *const names[number_of_registers] = { ++ "zr", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "fp", "x9", + "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", + "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals", + "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod" @@ -27037,7 +27063,7 @@ index 000000000..76215ef2a +} + +const char* FloatRegisterImpl::name() const { -+ const char* names[number_of_registers] = { ++ static const char *const names[number_of_registers] = { + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", @@ -27047,7 +27073,7 @@ index 000000000..76215ef2a +} + +const char* VectorRegisterImpl::name() const { -+ const char* names[number_of_registers] = { ++ static const char *const names[number_of_registers] = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", @@ -27057,13 +27083,12 @@ index 000000000..76215ef2a +} diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp new file mode 100644 -index 000000000..8beba6776 +index 0000000000..f64a06eb89 --- /dev/null +++ b/src/hotspot/cpu/riscv/register_riscv.hpp -@@ -0,0 +1,337 @@ +@@ -0,0 +1,381 @@ +/* -+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -27121,31 +27146,49 @@ index 000000000..8beba6776 + public: + enum { + number_of_registers = 32, -+ number_of_byte_registers = 32, -+ max_slots_per_register = 2 ++ max_slots_per_register = 2, ++ ++ // integer registers x8 - x15 and floating-point registers f8 - f15 are allocatable ++ // for compressed instructions. See Table 17.2 in spec. ++ compressed_register_base = 8, ++ compressed_register_top = 15, + }; + + // derived registers, offsets, and addresses -+ Register successor() const { return as_Register(encoding() + 1); } ++ const Register successor() const { return as_Register(encoding() + 1); } + + // construction + inline friend Register as_Register(int encoding); + -+ VMReg as_VMReg(); ++ VMReg as_VMReg() const; + + // accessors -+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } -+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } -+ bool has_byte_register() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; } ++ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } ++ int encoding_nocheck() const { return (intptr_t)this; } ++ bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } + const char* name() const; -+ int encoding_nocheck() const { return (intptr_t)this; } ++ ++ // for rvc ++ int compressed_encoding() const { ++ assert(is_compressed_valid(), "invalid compressed register"); ++ return encoding() - compressed_register_base; ++ } ++ ++ int compressed_encoding_nocheck() const { ++ return encoding_nocheck() - compressed_register_base; ++ } ++ ++ bool is_compressed_valid() const { ++ return encoding_nocheck() >= compressed_register_base && ++ encoding_nocheck() <= compressed_register_top; ++ } + + // Return the bit which represents this register. This is intended + // to be ORed into a bitmask: for usage see class RegSet below. -+ unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } ++ uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } +}; + -+// The integer registers of the riscv architecture ++// The integer registers of the RISCV architecture + +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + @@ -27195,23 +27238,41 @@ index 000000000..8beba6776 + public: + enum { + number_of_registers = 32, -+ max_slots_per_register = 2 ++ max_slots_per_register = 2, ++ ++ // float registers in the range of [f8~f15] correspond to RVC. Please see Table 16.2 in spec. ++ compressed_register_base = 8, ++ compressed_register_top = 15, + }; + + // construction + inline friend FloatRegister as_FloatRegister(int encoding); + -+ VMReg as_VMReg(); ++ VMReg as_VMReg() const; + + // derived registers, offsets, and addresses -+ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } + + // accessors -+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } -+ int encoding_nocheck() const { return (intptr_t)this; } -+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } ++ int encoding_nocheck() const { return (intptr_t)this; } ++ int is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } + const char* name() const; + ++ // for rvc ++ int compressed_encoding() const { ++ assert(is_compressed_valid(), "invalid compressed register"); ++ return encoding() - compressed_register_base; ++ } ++ ++ int compressed_encoding_nocheck() const { ++ return encoding_nocheck() - compressed_register_base; ++ } ++ ++ bool is_compressed_valid() const { ++ return encoding_nocheck() >= compressed_register_base && ++ encoding_nocheck() <= compressed_register_top; ++ } +}; + +// The float registers of the RISCV architecture @@ -27259,7 +27320,7 @@ index 000000000..8beba6776 + return (VectorRegister)(intptr_t) encoding; +} + -+// The implementation of vector registers for riscv-v ++// The implementation of vector registers for RVV +class VectorRegisterImpl: public AbstractRegisterImpl { + public: + enum { @@ -27270,15 +27331,15 @@ index 000000000..8beba6776 + // construction + inline friend VectorRegister as_VectorRegister(int encoding); + -+ VMReg as_VMReg(); ++ VMReg as_VMReg() const; + + // derived registers, offsets, and addresses + VectorRegister successor() const { return as_VectorRegister(encoding() + 1); } + + // accessors -+ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } -+ int encoding_nocheck() const { return (intptr_t)this; } -+ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ int encoding() const { assert(is_valid(), "invalid register"); return encoding_nocheck(); } ++ int encoding_nocheck() const { return (intptr_t)this; } ++ bool is_valid() const { return (unsigned)encoding_nocheck() < number_of_registers; } + const char* name() const; + +}; @@ -27331,29 +27392,26 @@ index 000000000..8beba6776 + // it's optoregs. + + number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + -+ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + -+ VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers) ++ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers) + }; + + // added to make it compile + static const int max_gpr; + static const int max_fpr; -+ static const int max_vpr; +}; + +// A set of registers +class RegSet { + uint32_t _bitset; + -+public: + RegSet(uint32_t bitset) : _bitset(bitset) { } + ++public: ++ + RegSet() : _bitset(0) { } + + RegSet(Register r1) : _bitset(r1->bit()) { } + -+ ~RegSet() {} -+ + RegSet operator+(const RegSet aSet) const { + RegSet result(_bitset | aSet._bitset); + return result; @@ -27369,6 +27427,11 @@ index 000000000..8beba6776 + return *this; + } + ++ RegSet &operator-=(const RegSet aSet) { ++ *this = *this - aSet; ++ return *this; ++ } ++ + static RegSet of(Register r1) { + return RegSet(r1); + } @@ -27388,26 +27451,33 @@ index 000000000..8beba6776 + static RegSet range(Register start, Register end) { + uint32_t bits = ~0; + bits <<= start->encoding(); -+ bits <<= (31 - end->encoding()); -+ bits >>= (31 - end->encoding()); ++ bits <<= 31 - end->encoding(); ++ bits >>= 31 - end->encoding(); + + return RegSet(bits); + } + + uint32_t bits() const { return _bitset; } ++ ++private: ++ ++ Register first() { ++ uint32_t first = _bitset & -_bitset; ++ return first ? as_Register(exact_log2(first)) : noreg; ++ } +}; + +#endif // CPU_RISCV_REGISTER_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp new file mode 100644 -index 000000000..f49fd6439 +index 0000000000..047ea2276c --- /dev/null +++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp -@@ -0,0 +1,113 @@ +@@ -0,0 +1,112 @@ +/* -+ * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -27444,10 +27514,9 @@ index 000000000..f49fd6439 + + int bytes; + -+ switch(type()) { ++ switch (type()) { + case relocInfo::oop_type: { + oop_Relocation *reloc = (oop_Relocation *)this; -+ // in movoop when immediate == false + if (NativeInstruction::is_load_pc_relative_at(addr())) { + address constptr = (address)code()->oop_addr_at(reloc->oop_index()); + bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); @@ -27519,13 +27588,12 @@ index 000000000..f49fd6439 +} diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp new file mode 100644 -index 000000000..c30150e0a +index 0000000000..840ed935d8 --- /dev/null +++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp -@@ -0,0 +1,45 @@ +@@ -0,0 +1,44 @@ +/* -+ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -27557,8 +27625,8 @@ index 000000000..c30150e0a + enum { + // Relocations are byte-aligned. + offset_unit = 1, -+ // We don't use format(). -+ format_width = 0 ++ // Must be at least 1 for RelocInfo::narrow_oop_in_const. ++ format_width = 1 + }; + + public: @@ -27570,14 +27638,14 @@ index 000000000..c30150e0a +#endif // CPU_RISCV_RELOCINFO_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad new file mode 100644 -index 000000000..137e9b7c7 +index 0000000000..02d6167629 --- /dev/null +++ b/src/hotspot/cpu/riscv/riscv.ad -@@ -0,0 +1,10685 @@ +@@ -0,0 +1,10280 @@ +// -+// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. -+// Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+// Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++// Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it @@ -27644,8 +27712,8 @@ index 000000000..137e9b7c7 +// +// follow the C1 compiler in making registers +// -+// x7, x9-x17, x28-x31 volatile (caller save) -+// x0-x4, x8, x27 system (no save, no allocate) ++// x7, x9-x17, x27-x31 volatile (caller save) ++// x0-x4, x8, x23 system (no save, no allocate) +// x5-x6 non-allocatable (so we can use them as temporary regs) + +// @@ -27658,8 +27726,8 @@ index 000000000..137e9b7c7 + +reg_def R0 ( NS, NS, Op_RegI, 0, x0->as_VMReg() ); // zr +reg_def R0_H ( NS, NS, Op_RegI, 0, x0->as_VMReg()->next() ); -+reg_def R1 ( SOC, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra -+reg_def R1_H ( SOC, SOC, Op_RegI, 1, x1->as_VMReg()->next() ); ++reg_def R1 ( NS, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra ++reg_def R1_H ( NS, SOC, Op_RegI, 1, x1->as_VMReg()->next() ); +reg_def R2 ( NS, SOE, Op_RegI, 2, x2->as_VMReg() ); // sp +reg_def R2_H ( NS, SOE, Op_RegI, 2, x2->as_VMReg()->next() ); +reg_def R3 ( NS, NS, Op_RegI, 3, x3->as_VMReg() ); // gp @@ -27803,179 +27871,6 @@ index 000000000..137e9b7c7 +reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next() ); + +// ---------------------------- -+// Vector Registers -+// ---------------------------- -+ -+// For RVV vector registers, we simply extend vector register size to 4 -+// 'logical' slots. This is nominally 128 bits but it actually covers -+// all possible 'physical' RVV vector register lengths from 128 ~ 1024 -+// bits. The 'physical' RVV vector register length is detected during -+// startup, so the register allocator is able to identify the correct -+// number of bytes needed for an RVV spill/unspill. -+// for Java use vector registers v0-v31 are always save on call just -+// as the platform ABI treats v0-v31 as caller save. -+ -+reg_def V0 ( SOC, SOC, Op_VecA, 0, v0->as_VMReg() ); -+reg_def V0_H ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next() ); -+reg_def V0_J ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(2) ); -+reg_def V0_K ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(3) ); -+ -+reg_def V1 ( SOC, SOC, Op_VecA, 1, v1->as_VMReg() ); -+reg_def V1_H ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next() ); -+reg_def V1_J ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(2) ); -+reg_def V1_K ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(3) ); -+ -+reg_def V2 ( SOC, SOC, Op_VecA, 2, v2->as_VMReg() ); -+reg_def V2_H ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next() ); -+reg_def V2_J ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(2) ); -+reg_def V2_K ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(3) ); -+ -+reg_def V3 ( SOC, SOC, Op_VecA, 3, v3->as_VMReg() ); -+reg_def V3_H ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next() ); -+reg_def V3_J ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(2) ); -+reg_def V3_K ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(3) ); -+ -+reg_def V4 ( SOC, SOC, Op_VecA, 4, v4->as_VMReg() ); -+reg_def V4_H ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next() ); -+reg_def V4_J ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(2) ); -+reg_def V4_K ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(3) ); -+ -+reg_def V5 ( SOC, SOC, Op_VecA, 5, v5->as_VMReg() ); -+reg_def V5_H ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next() ); -+reg_def V5_J ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(2) ); -+reg_def V5_K ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(3) ); -+ -+reg_def V6 ( SOC, SOC, Op_VecA, 6, v6->as_VMReg() ); -+reg_def V6_H ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next() ); -+reg_def V6_J ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(2) ); -+reg_def V6_K ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(3) ); -+ -+reg_def V7 ( SOC, SOC, Op_VecA, 7, v7->as_VMReg() ); -+reg_def V7_H ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next() ); -+reg_def V7_J ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(2) ); -+reg_def V7_K ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(3) ); -+ -+reg_def V8 ( SOC, SOC, Op_VecA, 8, v8->as_VMReg() ); -+reg_def V8_H ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next() ); -+reg_def V8_J ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(2) ); -+reg_def V8_K ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(3) ); -+ -+reg_def V9 ( SOC, SOC, Op_VecA, 9, v9->as_VMReg() ); -+reg_def V9_H ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next() ); -+reg_def V9_J ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(2) ); -+reg_def V9_K ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(3) ); -+ -+reg_def V10 ( SOC, SOC, Op_VecA, 10, v10->as_VMReg() ); -+reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next() ); -+reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) ); -+reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) ); -+ -+reg_def V11 ( SOC, SOC, Op_VecA, 11, v11->as_VMReg() ); -+reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next() ); -+reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) ); -+reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) ); -+ -+reg_def V12 ( SOC, SOC, Op_VecA, 12, v12->as_VMReg() ); -+reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next() ); -+reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) ); -+reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) ); -+ -+reg_def V13 ( SOC, SOC, Op_VecA, 13, v13->as_VMReg() ); -+reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next() ); -+reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) ); -+reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) ); -+ -+reg_def V14 ( SOC, SOC, Op_VecA, 14, v14->as_VMReg() ); -+reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next() ); -+reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) ); -+reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) ); -+ -+reg_def V15 ( SOC, SOC, Op_VecA, 15, v15->as_VMReg() ); -+reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next() ); -+reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) ); -+reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) ); -+ -+reg_def V16 ( SOC, SOC, Op_VecA, 16, v16->as_VMReg() ); -+reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next() ); -+reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) ); -+reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) ); -+ -+reg_def V17 ( SOC, SOC, Op_VecA, 17, v17->as_VMReg() ); -+reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next() ); -+reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) ); -+reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) ); -+ -+reg_def V18 ( SOC, SOC, Op_VecA, 18, v18->as_VMReg() ); -+reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next() ); -+reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) ); -+reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) ); -+ -+reg_def V19 ( SOC, SOC, Op_VecA, 19, v19->as_VMReg() ); -+reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next() ); -+reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) ); -+reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) ); -+ -+reg_def V20 ( SOC, SOC, Op_VecA, 20, v20->as_VMReg() ); -+reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next() ); -+reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) ); -+reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) ); -+ -+reg_def V21 ( SOC, SOC, Op_VecA, 21, v21->as_VMReg() ); -+reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next() ); -+reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) ); -+reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) ); -+ -+reg_def V22 ( SOC, SOC, Op_VecA, 22, v22->as_VMReg() ); -+reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next() ); -+reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) ); -+reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) ); -+ -+reg_def V23 ( SOC, SOC, Op_VecA, 23, v23->as_VMReg() ); -+reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next() ); -+reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) ); -+reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) ); -+ -+reg_def V24 ( SOC, SOC, Op_VecA, 24, v24->as_VMReg() ); -+reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next() ); -+reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) ); -+reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) ); -+ -+reg_def V25 ( SOC, SOC, Op_VecA, 25, v25->as_VMReg() ); -+reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next() ); -+reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) ); -+reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) ); -+ -+reg_def V26 ( SOC, SOC, Op_VecA, 26, v26->as_VMReg() ); -+reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next() ); -+reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) ); -+reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) ); -+ -+reg_def V27 ( SOC, SOC, Op_VecA, 27, v27->as_VMReg() ); -+reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next() ); -+reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) ); -+reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) ); -+ -+reg_def V28 ( SOC, SOC, Op_VecA, 28, v28->as_VMReg() ); -+reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next() ); -+reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) ); -+reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) ); -+ -+reg_def V29 ( SOC, SOC, Op_VecA, 29, v29->as_VMReg() ); -+reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next() ); -+reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) ); -+reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) ); -+ -+reg_def V30 ( SOC, SOC, Op_VecA, 30, v30->as_VMReg() ); -+reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next() ); -+reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) ); -+reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) ); -+ -+reg_def V31 ( SOC, SOC, Op_VecA, 31, v31->as_VMReg() ); -+reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next() ); -+reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) ); -+reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) ); -+ -+// ---------------------------- +// Special Registers +// ---------------------------- + @@ -28073,49 +27968,14 @@ index 000000000..137e9b7c7 + F27, F27_H, +); + -+alloc_class chunk2( -+ V0, V0_H, V0_J, V0_K, -+ V1, V1_H, V1_J, V1_K, -+ V2, V2_H, V2_J, V2_K, -+ V3, V3_H, V3_J, V3_K, -+ V4, V4_H, V4_J, V4_K, -+ V5, V5_H, V5_J, V5_K, -+ V6, V6_H, V6_J, V6_K, -+ V7, V7_H, V7_J, V7_K, -+ V8, V8_H, V8_J, V8_K, -+ V9, V9_H, V9_J, V9_K, -+ V10, V10_H, V10_J, V10_K, -+ V11, V11_H, V11_J, V11_K, -+ V12, V12_H, V12_J, V12_K, -+ V13, V13_H, V13_J, V13_K, -+ V14, V14_H, V14_J, V14_K, -+ V15, V15_H, V15_J, V15_K, -+ V16, V16_H, V16_J, V16_K, -+ V17, V17_H, V17_J, V17_K, -+ V18, V18_H, V18_J, V18_K, -+ V19, V19_H, V19_J, V19_K, -+ V20, V20_H, V20_J, V20_K, -+ V21, V21_H, V21_J, V21_K, -+ V22, V22_H, V22_J, V22_K, -+ V23, V23_H, V23_J, V23_K, -+ V24, V24_H, V24_J, V24_K, -+ V25, V25_H, V25_J, V25_K, -+ V26, V26_H, V26_J, V26_K, -+ V27, V27_H, V27_J, V27_K, -+ V28, V28_H, V28_J, V28_K, -+ V29, V29_H, V29_J, V29_K, -+ V30, V30_H, V30_J, V30_K, -+ V31, V31_H, V31_J, V31_K, -+); -+ -+alloc_class chunk3(RFLAGS); ++alloc_class chunk2(RFLAGS); + +//----------Architecture Description Register Classes-------------------------- +// Several register classes are automatically defined based upon information in +// this architecture description. +// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) -+// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) -+// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) ++// 2) reg_class compiler_method_reg ( /* as def'd in frame section */ ) ++// 2) reg_class interpreter_method_reg ( /* as def'd in frame section */ ) +// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) +// + @@ -28323,7 +28183,7 @@ index 000000000..137e9b7c7 +); + +// Class for link register -+reg_class lr_reg( ++reg_class ra_reg( + R1, R1_H +); + @@ -28406,41 +28266,6 @@ index 000000000..137e9b7c7 + F31, F31_H +); + -+// Class for all RVV vector registers -+reg_class vectora_reg( -+ V1, V1_H, V1_J, V1_K, -+ V2, V2_H, V2_J, V2_K, -+ V3, V3_H, V3_J, V3_K, -+ V4, V4_H, V4_J, V4_K, -+ V5, V5_H, V5_J, V5_K, -+ V6, V6_H, V6_J, V6_K, -+ V7, V7_H, V7_J, V7_K, -+ V8, V8_H, V8_J, V8_K, -+ V9, V9_H, V9_J, V9_K, -+ V10, V10_H, V10_J, V10_K, -+ V11, V11_H, V11_J, V11_K, -+ V12, V12_H, V12_J, V12_K, -+ V13, V13_H, V13_J, V13_K, -+ V14, V14_H, V14_J, V14_K, -+ V15, V15_H, V15_J, V15_K, -+ V16, V16_H, V16_J, V16_K, -+ V17, V17_H, V17_J, V17_K, -+ V18, V18_H, V18_J, V18_K, -+ V19, V19_H, V19_J, V19_K, -+ V20, V20_H, V20_J, V20_K, -+ V21, V21_H, V21_J, V21_K, -+ V22, V22_H, V22_J, V22_K, -+ V23, V23_H, V23_J, V23_K, -+ V24, V24_H, V24_J, V24_K, -+ V25, V25_H, V25_J, V25_K, -+ V26, V26_H, V26_J, V26_K, -+ V27, V27_H, V27_J, V27_K, -+ V28, V28_H, V28_J, V28_K, -+ V29, V29_H, V29_J, V29_K, -+ V30, V30_H, V30_J, V30_K, -+ V31, V31_H, V31_J, V31_K -+); -+ +// Class for 64 bit register f0 +reg_class f0_reg( + F0, F0_H @@ -28461,31 +28286,6 @@ index 000000000..137e9b7c7 + F3, F3_H +); + -+// class for vector register v1 -+reg_class v1_reg( -+ V1, V1_H, V1_J, V1_K -+); -+ -+// class for vector register v2 -+reg_class v2_reg( -+ V2, V2_H, V2_J, V2_K -+); -+ -+// class for vector register v3 -+reg_class v3_reg( -+ V3, V3_H, V3_J, V3_K -+); -+ -+// class for vector register v4 -+reg_class v4_reg( -+ V4, V4_H, V4_J, V4_K -+); -+ -+// class for vector register v5 -+reg_class v5_reg( -+ V5, V5_H, V5_J, V5_K -+); -+ +// class for condition codes +reg_class reg_flags(RFLAGS); +%} @@ -28516,7 +28316,7 @@ index 000000000..137e9b7c7 + int_def LOAD_COST ( 300, 3 * DEFAULT_COST); // load, fpload + int_def STORE_COST ( 100, 1 * DEFAULT_COST); // store, fpstore + int_def XFER_COST ( 300, 3 * DEFAULT_COST); // mfc, mtc, fcvt, fmove, fcmp -+ int_def BRANCH_COST ( 100, 1 * DEFAULT_COST); // branch, jmp, call ++ int_def BRANCH_COST ( 200, 2 * DEFAULT_COST); // branch, jmp, call + int_def IMUL_COST ( 1000, 10 * DEFAULT_COST); // imul + int_def IDIVSI_COST ( 3400, 34 * DEFAULT_COST); // idivdi + int_def IDIVDI_COST ( 6600, 66 * DEFAULT_COST); // idivsi @@ -28524,6 +28324,7 @@ index 000000000..137e9b7c7 + int_def FMUL_DOUBLE_COST ( 700, 7 * DEFAULT_COST); // fadd, fmul, fmadd + int_def FDIV_COST ( 2000, 20 * DEFAULT_COST); // fdiv + int_def FSQRT_COST ( 2500, 25 * DEFAULT_COST); // fsqrt ++ int_def VOLATILE_REF_COST ( 1000, 10 * DEFAULT_COST); +%} + + @@ -28535,7 +28336,6 @@ index 000000000..137e9b7c7 +source_hpp %{ + +#include "asm/macroAssembler.hpp" -+#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/collectedHeap.hpp" @@ -28584,85 +28384,62 @@ index 000000000..137e9b7c7 + } +}; + -+// predicate controlling translation of StoreCM -+bool unnecessary_storestore(const Node *storecm); -+ +bool is_CAS(int opcode, bool maybe_volatile); + +// predicate controlling translation of CompareAndSwapX -+bool needs_acquiring_load_exclusive(const Node *load); ++bool needs_acquiring_load_reserved(const Node *load); + ++// predicate controlling translation of StoreCM ++bool unnecessary_storestore(const Node *storecm); + -+// predicate using the temp register for decoding klass -+bool maybe_use_tmp_register_decoding_klass(); ++// predicate controlling addressing modes ++bool size_fits_all_mem_uses(AddPNode* addp, int shift); +%} + +source %{ + -+ // Derived RegMask with conditionally allocatable registers ++// Derived RegMask with conditionally allocatable registers + -+ RegMask _ANY_REG32_mask; -+ RegMask _ANY_REG_mask; -+ RegMask _PTR_REG_mask; -+ RegMask _NO_SPECIAL_REG32_mask; -+ RegMask _NO_SPECIAL_REG_mask; -+ RegMask _NO_SPECIAL_PTR_REG_mask; ++RegMask _ANY_REG32_mask; ++RegMask _ANY_REG_mask; ++RegMask _PTR_REG_mask; ++RegMask _NO_SPECIAL_REG32_mask; ++RegMask _NO_SPECIAL_REG_mask; ++RegMask _NO_SPECIAL_PTR_REG_mask; + -+ void reg_mask_init() { ++void reg_mask_init() { + -+ _ANY_REG32_mask = _ALL_REG32_mask; -+ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg())); ++ _ANY_REG32_mask = _ALL_REG32_mask; ++ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg())); + -+ _ANY_REG_mask = _ALL_REG_mask; -+ _ANY_REG_mask.SUBTRACT(_ZR_REG_mask); ++ _ANY_REG_mask = _ALL_REG_mask; ++ _ANY_REG_mask.SUBTRACT(_ZR_REG_mask); + -+ _PTR_REG_mask = _ALL_REG_mask; -+ _PTR_REG_mask.SUBTRACT(_ZR_REG_mask); ++ _PTR_REG_mask = _ALL_REG_mask; ++ _PTR_REG_mask.SUBTRACT(_ZR_REG_mask); + -+ _NO_SPECIAL_REG32_mask = _ALL_REG32_mask; -+ _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask); ++ _NO_SPECIAL_REG32_mask = _ALL_REG32_mask; ++ _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask); + -+ _NO_SPECIAL_REG_mask = _ALL_REG_mask; -+ _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); ++ _NO_SPECIAL_REG_mask = _ALL_REG_mask; ++ _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); + -+ _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask; -+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); ++ _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask; ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); + -+ // x27 is not allocatable when compressed oops is on -+ if (UseCompressedOops) { -+ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg())); -+ _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); -+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); -+ } -+ -+ // x8 is not allocatable when PreserveFramePointer is on -+ if (PreserveFramePointer) { -+ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg())); -+ _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask); -+ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask); -+ } ++ // x27 is not allocatable when compressed oops is on ++ if (UseCompressedOops) { ++ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg())); ++ _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); + } + -+ -+// predicate controlling translation of StoreCM -+// -+// returns true if a StoreStore must precede the card write otherwise -+// false -+bool unnecessary_storestore(const Node *storecm) -+{ -+ assert(storecm != NULL && storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); -+ -+ // we need to generate a membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore) -+ // between an object put and the associated card mark when we are using -+ // CMS without conditional card marking -+ -+ if (UseConcMarkSweepGC && !UseCondCardMark) { -+ return false; ++ // x8 is not allocatable when PreserveFramePointer is on ++ if (PreserveFramePointer) { ++ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg())); ++ _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask); ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask); + } -+ -+ // a storestore is unnecesary in all other cases -+ -+ return true; +} + +// is_CAS(int opcode, bool maybe_volatile) @@ -28671,12 +28448,16 @@ index 000000000..137e9b7c7 +// values otherwise false. +bool is_CAS(int opcode, bool maybe_volatile) +{ -+ switch(opcode) { ++ switch (opcode) { + // We handle these + case Op_CompareAndSwapI: + case Op_CompareAndSwapL: + case Op_CompareAndSwapP: + case Op_CompareAndSwapN: ++#if INCLUDE_SHENANDOAHGC ++ case Op_ShenandoahCompareAndSwapP: ++ case Op_ShenandoahCompareAndSwapN: ++#endif + case Op_CompareAndSwapB: + case Op_CompareAndSwapS: + case Op_GetAndSetI: @@ -28685,10 +28466,6 @@ index 000000000..137e9b7c7 + case Op_GetAndSetN: + case Op_GetAndAddI: + case Op_GetAndAddL: -+#if INCLUDE_SHENANDOAHGC -+ case Op_ShenandoahCompareAndSwapP: -+ case Op_ShenandoahCompareAndSwapN: -+#endif + return true; + case Op_CompareAndExchangeI: + case Op_CompareAndExchangeN: @@ -28698,7 +28475,7 @@ index 000000000..137e9b7c7 + case Op_CompareAndExchangeP: + case Op_WeakCompareAndSwapB: + case Op_WeakCompareAndSwapS: -+ case Op_WeakCompareAndSwapI: ++ case Op_WeakCompareAndSwapI: + case Op_WeakCompareAndSwapL: + case Op_WeakCompareAndSwapP: + case Op_WeakCompareAndSwapN: @@ -28711,12 +28488,9 @@ index 000000000..137e9b7c7 +// predicate controlling translation of CAS +// +// returns true if CAS needs to use an acquiring load otherwise false -+bool needs_acquiring_load_exclusive(const Node *n) ++bool needs_acquiring_load_reserved(const Node *n) +{ + assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap"); -+ if (UseBarriersForVolatile) { -+ return false; -+ } + + LoadStoreNode* ldst = n->as_LoadStore(); + if (n != NULL && is_CAS(n->Opcode(), false)) { @@ -28728,11 +28502,28 @@ index 000000000..137e9b7c7 + return true; +} + -+bool maybe_use_tmp_register_decoding_klass() { -+ return !UseCompressedOops && -+ Universe::narrow_klass_base() != NULL && -+ Universe::narrow_klass_shift() != 0; ++// predicate controlling translation of StoreCM ++// ++// returns true if a StoreStore must precede the card write otherwise ++// false ++ ++bool unnecessary_storestore(const Node *storecm) ++{ ++ assert(storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); ++ ++ // we need to generate a dmb ishst between an object put and the ++ // associated card mark when we are using CMS without conditional ++ // card marking ++ ++ if (UseConcMarkSweepGC && !UseCondCardMark) { ++ return false; ++ } ++ ++ // a storestore is unnecesary in all other cases ++ ++ return true; +} ++ +#define __ _masm. + +// advance declarations for helper functions to convert register @@ -28751,14 +28542,13 @@ index 000000000..137e9b7c7 + +int MachCallStaticJavaNode::ret_addr_offset() +{ -+ // call should be a simple jal -+ int off = 4; -+ return off; ++ // jal ++ return 1 * NativeInstruction::instruction_size; +} + +int MachCallDynamicJavaNode::ret_addr_offset() +{ -+ return 28; // movptr, jal ++ return 7 * NativeInstruction::instruction_size; // movptr, jal +} + +int MachCallRuntimeNode::ret_addr_offset() { @@ -28766,14 +28556,13 @@ index 000000000..137e9b7c7 + // jal(addr) + // or with far branches + // jal(trampoline_stub) -+ // for real runtime callouts it will be five instructions ++ // for real runtime callouts it will be 11 instructions + // see riscv_enc_java_to_runtime -+ // la(t1, retaddr) -+ // la(t0, RuntimeAddress(addr)) -+ // addi(sp, sp, -2 * wordSize) -+ // sd(zr, Address(sp)) -+ // sd(t1, Address(sp, wordSize)) -+ // jalr(t0) ++ // la(t1, retaddr) -> auipc + addi ++ // la(t0, RuntimeAddress(addr)) -> lui + addi + slli + addi + slli + addi ++ // addi(sp, sp, -2 * wordSize) -> addi ++ // sd(t1, Address(sp, wordSize)) -> sd ++ // jalr(t0) -> jalr + CodeBlob *cb = CodeCache::find_blob(_entry_point); + if (cb != NULL) { + return 1 * NativeInstruction::instruction_size; @@ -28782,6 +28571,34 @@ index 000000000..137e9b7c7 + } +} + ++// ++// Compute padding required for nodes which need alignment ++// ++ ++// With RVC a call instruction may get 2-byte aligned. ++// The address of the call instruction needs to be 4-byte aligned to ++// ensure that it does not span a cache line so that it can be patched. ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const ++{ ++ // to make sure the address of jal 4-byte aligned. ++ return align_up(current_offset, alignment_required()) - current_offset; ++} ++ ++// With RVC a call instruction may get 2-byte aligned. ++// The address of the call instruction needs to be 4-byte aligned to ++// ensure that it does not span a cache line so that it can be patched. ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const ++{ ++ // skip the movptr in MacroAssembler::ic_call(): ++ // lui + addi + slli + addi + slli + addi ++ // Though movptr() has already 4-byte aligned with or without RVC, ++ // We need to prevent from further changes by explicitly calculating the size. ++ const int movptr_size = 6 * NativeInstruction::instruction_size; ++ current_offset += movptr_size; ++ // to make sure the address of jal 4-byte aligned. ++ return align_up(current_offset, alignment_required()) - current_offset; ++} ++ +// Indicate if the safepoint node needs the polling page as an input + +// the shared code plants the oop data at the start of the generated @@ -28807,6 +28624,7 @@ index 000000000..137e9b7c7 + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); + __ ebreak(); +} + @@ -28824,13 +28642,14 @@ index 000000000..137e9b7c7 + + void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { + MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); // nops shall be 2-byte under RVC for alignment purposes. + for (int i = 0; i < _count; i++) { + __ nop(); + } + } + + uint MachNopNode::size(PhaseRegAlloc*) const { -+ return _count * NativeInstruction::instruction_size; ++ return _count * (UseRVC ? NativeInstruction::compressed_instruction_size : NativeInstruction::instruction_size); + } + +//============================================================================= @@ -28871,10 +28690,10 @@ index 000000000..137e9b7c7 + st->print("# stack bang size=%d\n\t", framesize); + } + ++ st->print("sd fp, [sp, #%d]\n\t", - 2 * wordSize); ++ st->print("sd ra, [sp, #%d]\n\t", - wordSize); ++ if (PreserveFramePointer) { st->print("sub fp, sp, #%d\n\t", 2 * wordSize); } + st->print("sub sp, sp, #%d\n\t", framesize); -+ st->print("sd fp, [sp, #%d]", - 2 * wordSize); -+ st->print("sd ra, [sp, #%d]", - wordSize); -+ if (PreserveFramePointer) { st->print("\n\tsub fp, sp, #%d", 2 * wordSize); } +} +#endif + @@ -28885,15 +28704,15 @@ index 000000000..137e9b7c7 + + // n.b. frame size includes space for return pc and fp + const int framesize = C->frame_size_in_bytes(); -+ assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); + + // insert a nop at the start of the prolog so we can patch in a + // branch if we need to invalidate the method later + __ nop(); + + assert_cond(C != NULL); ++ + int bangsize = C->bang_size_in_bytes(); -+ if (C->need_stack_bang(bangsize) && UseStackBanging) { ++ if (C->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); + } + @@ -28949,7 +28768,7 @@ index 000000000..137e9b7c7 + if (do_polling() && C->is_method_compilation()) { + st->print("# touch polling page\n\t"); + st->print("li t0, #0x%lx\n\t", p2i(os::get_polling_page())); -+ st->print("ld zr, [t0]"); ++ st->print("ld zr, [t0]"); + } +} +#endif @@ -28986,6 +28805,9 @@ index 000000000..137e9b7c7 + return MachNode::pipeline_class(); +} + ++// This method seems to be obsolete. It is declared in machnode.hpp ++// and defined in all *.ad files, but it is never called. Should we ++// get rid of it? +int MachEpilogNode::safepoint_offset() const { + assert(do_polling(), "no return for this epilog node"); + return 4; @@ -28995,7 +28817,7 @@ index 000000000..137e9b7c7 + +// Figure out which register class each belongs in: rc_int, rc_float or +// rc_stack. -+enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack }; ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; + +static enum RC rc_class(OptoReg::Name reg) { + @@ -29016,13 +28838,7 @@ index 000000000..137e9b7c7 + return rc_float; + } + -+ // we have 32 vector register * 4 halves -+ int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers; -+ if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) { -+ return rc_vector; -+ } -+ -+ // Between vector regs & stack is the flags regs. ++ // Between float regs & stack is the flags regs. + assert(OptoReg::is_stack(reg), "blow up if spilling flags"); + + return rc_stack; @@ -29060,32 +28876,9 @@ index 000000000..137e9b7c7 + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + -+ if (bottom_type() == NULL) { -+ ShouldNotReachHere(); -+ } else if (bottom_type()->isa_vect() != NULL) { -+ uint ireg = ideal_reg(); -+ if (ireg == Op_VecA && cbuf) { -+ MacroAssembler _masm(cbuf); -+ int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); -+ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { -+ // stack to stack -+ __ spill_copy_vector_stack_to_stack(src_offset, dst_offset, -+ vector_reg_size_in_bytes); -+ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) { -+ // vpr to stack -+ __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo)); -+ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) { -+ // stack to vpr -+ __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo)); -+ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { -+ // vpr to vpr -+ __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } -+ } else if (cbuf != NULL) { ++ if (cbuf != NULL) { + MacroAssembler _masm(cbuf); ++ Assembler::CompressibleRegion cr(&_masm); + switch (src_lo_rc) { + case rc_int: + if (dst_lo_rc == rc_int) { // gpr --> gpr copy @@ -29167,17 +28960,7 @@ index 000000000..137e9b7c7 + } else { + st->print("%s", Matcher::regName[dst_lo]); + } -+ if (bottom_type()->isa_vect() != NULL) { -+ int vsize = 0; -+ if (ideal_reg() == Op_VecA) { -+ vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; -+ } else { -+ ShouldNotReachHere(); -+ } -+ st->print("\t# vector spill size = %d", vsize); -+ } else { -+ st->print("\t# spill size = %d", is64 ? 64 : 32); -+ } ++ st->print("\t# spill size = %d", is64 ? 64 : 32); + } + + return 0; @@ -29249,14 +29032,16 @@ index 000000000..137e9b7c7 + assert_cond(st != NULL); + st->print_cr("# MachUEPNode"); + if (UseCompressedClassPointers) { -+ st->print_cr("\tlw t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); ++ st->print_cr("\tlwu t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); + if (Universe::narrow_klass_shift() != 0) { + st->print_cr("\tdecode_klass_not_null t0, t0"); + } + } else { -+ st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); ++ st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); + } -+ st->print_cr("\tbne x10, t0, SharedRuntime::_ic_miss_stub\t # Inline cache check"); ++ st->print_cr("\tbeq t0, t1, ic_hit"); ++ st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check"); ++ st->print_cr("\tic_hit:"); +} +#endif + @@ -29334,15 +29119,10 @@ index 000000000..137e9b7c7 + } + + switch (opcode) { -+ case Op_StrCompressedCopy: // fall through -+ case Op_StrInflatedCopy: // fall through -+ case Op_HasNegatives: -+ return UseRVV; -+ case Op_EncodeISOArray: -+ return UseRVV && SpecialEncodeISOArray; + case Op_PopCountI: + case Op_PopCountL: + return UsePopCountInstruction; ++ + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + case Op_CountTrailingZerosI: @@ -29355,17 +29135,12 @@ index 000000000..137e9b7c7 + +// Identify extra cases that we might want to provide match rules for vector nodes and +// other intrinsics guarded with vector length (vlen). -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; -+ } -+ -+ return op_vec_supported(opcode); ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++ return false; +} + +const bool Matcher::has_predicated_vectors(void) { -+ return false; // not supported -+ ++ return false; +} + +const int Matcher::float_pressure(int default_pressure_threshold) { @@ -29414,11 +29189,6 @@ index 000000000..137e9b7c7 + +// Vector width in bytes. +const int Matcher::vector_width_in_bytes(BasicType bt) { -+ if (UseRVV) { -+ // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV. -+ // MaxVectorSize == VM_Version::_initial_vector_length -+ return MaxVectorSize; -+ } + return 0; +} + @@ -29432,34 +29202,13 @@ index 000000000..137e9b7c7 + +// Vector ideal reg. +const uint Matcher::vector_ideal_reg(int len) { -+ assert(MaxVectorSize >= len, ""); -+ if (UseRVV) { -+ return Op_VecA; -+ } -+ + ShouldNotReachHere(); + return 0; +} + +const uint Matcher::vector_shift_count_ideal_reg(int size) { -+ switch(size) { -+ case 8: return Op_VecD; -+ case 16: return Op_VecX; -+ default: -+ if (size == vector_width_in_bytes(T_BYTE)) { -+ return Op_VecA; -+ } -+ } -+ ShouldNotReachHere(); -+ return 0; -+} -+ -+const bool Matcher::supports_scalable_vector() { -+ return UseRVV; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return Matcher::max_vector_size(bt); ++ fatal("vector shift is not supported"); ++ return Node::NotAMachineReg; +} + +// AES support not yet implemented @@ -29467,7 +29216,7 @@ index 000000000..137e9b7c7 + return false; +} + -+// riscv supports misaligned vectors store/load. ++// RISC-V supports misaligned vectors store/load. +const bool Matcher::misaligned_vectors_ok() { + return true; +} @@ -29638,42 +29387,7 @@ index 000000000..137e9b7c7 +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { -+ assert_cond(m != NULL); -+ if (clone_base_plus_offset_address(m, mstack, address_visited)) { -+ return true; -+ } -+ -+ Node *off = m->in(AddPNode::Offset); -+ if (off != NULL && off->Opcode() == Op_LShiftL && off->in(2)->is_Con() && -+ size_fits_all_mem_uses(m, off->in(2)->get_int()) && -+ // Are there other uses besides address expressions? -+ !is_visited(off)) { -+ address_visited.set(off->_idx); // Flag as address_visited -+ mstack.push(off->in(2), Visit); -+ Node *conv = off->in(1); -+ if (conv->Opcode() == Op_ConvI2L && -+ // Are there other uses besides address expressions? -+ !is_visited(conv)) { -+ address_visited.set(conv->_idx); // Flag as address_visited -+ mstack.push(conv->in(1), Pre_Visit); -+ } else { -+ mstack.push(conv, Pre_Visit); -+ } -+ address_visited.test_set(m->_idx); // Flag as address_visited -+ mstack.push(m->in(AddPNode::Address), Pre_Visit); -+ mstack.push(m->in(AddPNode::Base), Pre_Visit); -+ return true; -+ } else if (off != NULL && off->Opcode() == Op_ConvI2L && -+ // Are there other uses besides address expressions? -+ !is_visited(off)) { -+ address_visited.test_set(m->_idx); // Flag as address_visited -+ address_visited.set(off->_idx); // Flag as address_visited -+ mstack.push(off->in(1), Pre_Visit); -+ mstack.push(m->in(AddPNode::Address), Pre_Visit); -+ mstack.push(m->in(AddPNode::Base), Pre_Visit); -+ return true; -+ } -+ return false; ++ return clone_base_plus_offset_address(m, mstack, address_visited); +} + +void Compile::reshape_address(AddPNode* addp) { @@ -29714,9 +29428,10 @@ index 000000000..137e9b7c7 + + enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{ + MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); + int64_t con = (int64_t)$src$$constant; + Register dst_reg = as_Register($dst$$reg); -+ __ mv(dst_reg, con); ++ __ li(dst_reg, con); + %} + + enc_class riscv_enc_mov_p(iRegP dst, immP src) %{ @@ -29733,15 +29448,16 @@ index 000000000..137e9b7c7 + __ mov_metadata(dst_reg, (Metadata*)con); + } else { + assert(rtype == relocInfo::none, "unexpected reloc type"); -+ __ mv(dst_reg, $src$$constant); ++ __ li(dst_reg, $src$$constant); + } + } + %} + + enc_class riscv_enc_mov_p1(iRegP dst) %{ + MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); + Register dst_reg = as_Register($dst$$reg); -+ __ mv(dst_reg, 1); ++ __ li(dst_reg, 1); + %} + + enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{ @@ -29792,42 +29508,42 @@ index 000000000..137e9b7c7 + } + %} + -+ enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ ++ enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ + MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + -+ enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ ++ enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ + MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + -+ enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ ++ enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{ + MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + -+ enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ ++ enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ + MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + -+ enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ ++ enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegI oldval, iRegI newval) %{ + MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + %} + -+ enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ ++ enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegL oldval, iRegL newval) %{ + MacroAssembler _masm(&cbuf); + __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, @@ -29845,7 +29561,7 @@ index 000000000..137e9b7c7 + enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ + MacroAssembler _masm(&cbuf); + Label* L = $lbl$$label; -+ switch($cmp$$cmpcode) { ++ switch ($cmp$$cmpcode) { + case(BoolTest::ge): + __ j(*L); + break; @@ -29879,7 +29595,7 @@ index 000000000..137e9b7c7 + + __ bind(miss); + if (!$primary) { -+ __ mv(cr_reg, 1); ++ __ li(cr_reg, 1); + } + + __ bind(done); @@ -29893,7 +29609,7 @@ index 000000000..137e9b7c7 + assert_cond(addr != NULL); + if (!_method) { + // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. -+ call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type)); ++ call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; @@ -29902,19 +29618,19 @@ index 000000000..137e9b7c7 + int method_index = resolved_method_index(cbuf); + RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) + : static_call_Relocation::spec(method_index); -+ call = __ trampoline_call(Address(addr, rspec)); ++ call = __ trampoline_call(Address(addr, rspec), &cbuf); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } ++ + // Emit stub for static call -+ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call); ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } -+ + %} + + enc_class riscv_enc_java_dynamic_call(method meth) %{ @@ -29964,19 +29680,19 @@ index 000000000..137e9b7c7 + %} + + // using the cr register as the bool result: 0 for success; others failed. -+ enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ ++ enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{ + MacroAssembler _masm(&cbuf); + Register flag = t1; + Register oop = as_Register($object$$reg); + Register box = as_Register($box$$reg); -+ Register disp_hdr = as_Register($tmp$$reg); ++ Register disp_hdr = as_Register($tmp1$$reg); + Register tmp = as_Register($tmp2$$reg); + Label cont; + Label object_has_monitor; + + assert_different_registers(oop, box, tmp, disp_hdr, t0); + -+ // Load markOop from object into displaced_header. ++ // Load markWord from object into displaced_header. + __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); + + // Always do locking in runtime. @@ -29986,7 +29702,6 @@ index 000000000..137e9b7c7 + } + + if (UseBiasedLocking && !UseOptoBiasInlining) { -+ // ignore slow case here + __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag); + } + @@ -29996,15 +29711,15 @@ index 000000000..137e9b7c7 + __ bnez(t0, object_has_monitor); + } + -+ // Set tmp to be (markOop of object | UNLOCK_VALUE). ++ // Set tmp to be (markWord of object | UNLOCK_VALUE). + __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); + + // Initialize the box. (Must happen before we update the object mark!) + __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + -+ // Compare object markOop with an unlocked value (tmp) and if -+ // equal exchange the stack address of our box with object markOop. -+ // On failure disp_hdr contains the possibly locked markOop. ++ // Compare object markWord with an unlocked value (tmp) and if ++ // equal exchange the stack address of our box with object markWord. ++ // On failure disp_hdr contains the possibly locked markWord. + __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, + Assembler::rl, /*result*/disp_hdr); + __ mv(flag, zr); @@ -30017,9 +29732,9 @@ index 000000000..137e9b7c7 + // We did not see an unlocked object so try the fast recursive case. + + // Check if the owner is self by comparing the value in the -+ // markOop of object (disp_hdr) with the stack pointer. ++ // markWord of object (disp_hdr) with the stack pointer. + __ sub(disp_hdr, disp_hdr, sp); -+ __ mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); ++ __ li(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); + // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, + // hence we can store 0 as the displaced header in the box, which indicates that it is a + // recursive lock. @@ -30038,7 +29753,7 @@ index 000000000..137e9b7c7 + // Try to CAS m->owner from NULL to current thread. + __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); + __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, -+ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) ++ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) + + // Store a non-null value into the box to avoid looking like a re-entrant + // lock. The fast-path monitor unlock code checks for @@ -30052,5133 +29767,4015 @@ index 000000000..137e9b7c7 + %} + + // using cr flag to indicate the fast_unlock result: 0 for success; others failed. -+ enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ ++ enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{ + MacroAssembler _masm(&cbuf); + Register flag = t1; + Register oop = as_Register($object$$reg); + Register box = as_Register($box$$reg); -+ Register disp_hdr = as_Register($tmp$$reg); ++ Register disp_hdr = as_Register($tmp1$$reg); + Register tmp = as_Register($tmp2$$reg); + Label cont; + Label object_has_monitor; + -+ assert_different_registers(oop, box, tmp, disp_hdr, flag); -+ -+ // Always do locking in runtime. -+ if (EmitSync & 0x01) { -+ __ mv(flag, 1); -+ return; -+ } -+ -+ if (UseBiasedLocking && !UseOptoBiasInlining) { -+ __ biased_locking_exit(oop, tmp, cont, flag); -+ } -+ -+ // Find the lock address and load the displaced header from the stack. -+ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); -+ -+ // If the displaced header is 0, we have a recursive unlock. -+ __ mv(flag, disp_hdr); -+ __ beqz(disp_hdr, cont); -+ -+ // Handle existing monitor. -+ if ((EmitSync & 0x02) == 0) { -+ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); -+ __ andi(t0, disp_hdr, markOopDesc::monitor_value); -+ __ bnez(t0, object_has_monitor); -+ } -+ -+ // Check if it is still a light weight lock, this is true if we -+ // see the stack address of the basicLock in the markOop of the -+ // object. -+ -+ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, -+ Assembler::rl, /*result*/tmp); -+ __ xorr(flag, box, tmp); // box == tmp if cas succeeds -+ __ j(cont); -+ -+ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); -+ -+ // Handle existing monitor. -+ if ((EmitSync & 0x02) == 0) { -+ __ bind(object_has_monitor); -+ __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor -+ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); -+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); -+ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. -+ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions -+ __ bnez(flag, cont); -+ -+ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); -+ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); -+ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. -+ __ bnez(flag, cont); -+ // need a release store here -+ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sd(zr, Address(tmp)); // set unowned -+ } -+ -+ __ bind(cont); -+ %} -+ -+ // arithmetic encodings -+ -+ enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ Register src1_reg = as_Register($src1$$reg); -+ Register src2_reg = as_Register($src2$$reg); -+ __ corrected_idivl(dst_reg, src1_reg, src2_reg, false); -+ %} -+ -+ enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ Register src1_reg = as_Register($src1$$reg); -+ Register src2_reg = as_Register($src2$$reg); -+ __ corrected_idivq(dst_reg, src1_reg, src2_reg, false); -+ %} -+ -+ enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ Register src1_reg = as_Register($src1$$reg); -+ Register src2_reg = as_Register($src2$$reg); -+ __ corrected_idivl(dst_reg, src1_reg, src2_reg, true); -+ %} -+ -+ enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ -+ MacroAssembler _masm(&cbuf); -+ Register dst_reg = as_Register($dst$$reg); -+ Register src1_reg = as_Register($src1$$reg); -+ Register src2_reg = as_Register($src2$$reg); -+ __ corrected_idivq(dst_reg, src1_reg, src2_reg, true); -+ %} -+ -+ enc_class riscv_enc_tail_call(iRegP jump_target) %{ -+ MacroAssembler _masm(&cbuf); -+ Register target_reg = as_Register($jump_target$$reg); -+ __ jr(target_reg); -+ %} -+ -+ enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ -+ MacroAssembler _masm(&cbuf); -+ Register target_reg = as_Register($jump_target$$reg); -+ // exception oop should be in x10 -+ // ret addr has been popped into ra -+ // callee expects it in x13 -+ __ mv(x13, ra); -+ __ jr(target_reg); -+ %} -+ -+ enc_class riscv_enc_rethrow() %{ -+ MacroAssembler _masm(&cbuf); -+ __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); -+ %} -+ -+ enc_class riscv_enc_ret() %{ -+ MacroAssembler _masm(&cbuf); -+ __ ret(); -+ %} -+ -+%} -+ -+//----------FRAME-------------------------------------------------------------- -+// Definition of frame structure and management information. -+// -+// S T A C K L A Y O U T Allocators stack-slot number -+// | (to get allocators register number -+// G Owned by | | v add OptoReg::stack0()) -+// r CALLER | | -+// o | +--------+ pad to even-align allocators stack-slot -+// w V | pad0 | numbers; owned by CALLER -+// t -----------+--------+----> Matcher::_in_arg_limit, unaligned -+// h ^ | in | 5 -+// | | args | 4 Holes in incoming args owned by SELF -+// | | | | 3 -+// | | +--------+ -+// V | | old out| Empty on Intel, window on Sparc -+// | old |preserve| Must be even aligned. -+// | SP-+--------+----> Matcher::_old_SP, even aligned -+// | | in | 3 area for Intel ret address -+// Owned by |preserve| Empty on Sparc. -+// SELF +--------+ -+// | | pad2 | 2 pad to align old SP -+// | +--------+ 1 -+// | | locks | 0 -+// | +--------+----> OptoReg::stack0(), even aligned -+// | | pad1 | 11 pad to align new SP -+// | +--------+ -+// | | | 10 -+// | | spills | 9 spills -+// V | | 8 (pad0 slot for callee) -+// -----------+--------+----> Matcher::_out_arg_limit, unaligned -+// ^ | out | 7 -+// | | args | 6 Holes in outgoing args owned by CALLEE -+// Owned by +--------+ -+// CALLEE | new out| 6 Empty on Intel, window on Sparc -+// | new |preserve| Must be even-aligned. -+// | SP-+--------+----> Matcher::_new_SP, even aligned -+// | | | -+// -+// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is -+// known from SELF's arguments and the Java calling convention. -+// Region 6-7 is determined per call site. -+// Note 2: If the calling convention leaves holes in the incoming argument -+// area, those holes are owned by SELF. Holes in the outgoing area -+// are owned by the CALLEE. Holes should not be nessecary in the -+// incoming area, as the Java calling convention is completely under -+// the control of the AD file. Doubles can be sorted and packed to -+// avoid holes. Holes in the outgoing arguments may be nessecary for -+// varargs C calling conventions. -+// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is -+// even aligned with pad0 as needed. -+// Region 6 is even aligned. Region 6-7 is NOT even aligned; -+// (the latter is true on Intel but is it false on RISCV?) -+// region 6-11 is even aligned; it may be padded out more so that -+// the region from SP to FP meets the minimum stack alignment. -+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack -+// alignment. Region 11, pad1, may be dynamically extended so that -+// SP meets the minimum alignment. -+ -+frame %{ -+ // What direction does stack grow in (assumed to be same for C & Java) -+ stack_direction(TOWARDS_LOW); -+ -+ // These three registers define part of the calling convention -+ // between compiled code and the interpreter. -+ -+ // Inline Cache Register or methodOop for I2C. -+ inline_cache_reg(R31); -+ -+ // Method Oop Register when calling interpreter. -+ interpreter_method_oop_reg(R31); -+ -+ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] -+ cisc_spilling_operand_name(indOffset); -+ -+ // Number of stack slots consumed by locking an object -+ // generate Compile::sync_stack_slots -+ // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2 -+ sync_stack_slots(1 * VMRegImpl::slots_per_word); -+ -+ // Compiled code's Frame Pointer -+ frame_pointer(R2); -+ -+ // Interpreter stores its frame pointer in a register which is -+ // stored to the stack by I2CAdaptors. -+ // I2CAdaptors convert from interpreted java to compiled java. -+ interpreter_frame_pointer(R8); -+ -+ // Stack alignment requirement -+ stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) -+ -+ // Number of stack slots between incoming argument block and the start of -+ // a new frame. The PROLOG must add this many slots to the stack. The -+ // EPILOG must remove this many slots. -+ // RISCV needs two words for RA (return address) and FP (frame pointer). -+ in_preserve_stack_slots(2 * VMRegImpl::slots_per_word); -+ -+ // Number of outgoing stack slots killed above the out_preserve_stack_slots -+ // for calls to C. Supports the var-args backing area for register parms. -+ varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt); -+ -+ // The after-PROLOG location of the return address. Location of -+ // return address specifies a type (REG or STACK) and a number -+ // representing the register number (i.e. - use a register name) or -+ // stack slot. -+ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. -+ // Otherwise, it is above the locks and verification slot and alignment word -+ // TODO this may well be correct but need to check why that - 2 is there -+ // ppc port uses 0 but we definitely need to allow for fixed_slots -+ // which folds in the space used for monitors -+ return_addr(STACK - 2 + -+ align_up((Compile::current()->in_preserve_stack_slots() + -+ Compile::current()->fixed_slots()), -+ stack_alignment_in_slots())); -+ -+ // Body of function which returns an integer array locating -+ // arguments either in registers or in stack slots. Passed an array -+ // of ideal registers called "sig" and a "length" count. Stack-slot -+ // offsets are based on outgoing arguments, i.e. a CALLER setting up -+ // arguments for a CALLEE. Incoming stack arguments are -+ // automatically biased by the preserve_stack_slots field above. -+ -+ calling_convention -+ %{ -+ // No difference between ingoing/outgoing just pass false -+ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); -+ %} -+ -+ c_calling_convention -+ %{ -+ // This is obviously always outgoing -+ (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length); -+ %} -+ -+ // Location of compiled Java return values. Same as C for now. -+ return_value -+ %{ -+ assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, -+ "only return normal values"); -+ -+ static const int lo[Op_RegL + 1] = { // enum name -+ 0, // Op_Node -+ 0, // Op_Set -+ R10_num, // Op_RegN -+ R10_num, // Op_RegI -+ R10_num, // Op_RegP -+ F10_num, // Op_RegF -+ F10_num, // Op_RegD -+ R10_num // Op_RegL -+ }; -+ -+ static const int hi[Op_RegL + 1] = { // enum name -+ 0, // Op_Node -+ 0, // Op_Set -+ OptoReg::Bad, // Op_RegN -+ OptoReg::Bad, // Op_RegI -+ R10_H_num, // Op_RegP -+ OptoReg::Bad, // Op_RegF -+ F10_H_num, // Op_RegD -+ R10_H_num // Op_RegL -+ }; -+ -+ return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); -+ %} -+%} -+ -+//----------ATTRIBUTES--------------------------------------------------------- -+//----------Operand Attributes------------------------------------------------- -+op_attrib op_cost(1); // Required cost attribute -+ -+//----------Instruction Attributes--------------------------------------------- -+ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute -+ins_attrib ins_size(32); // Required size attribute (in bits) -+ins_attrib ins_short_branch(0); // Required flag: is this instruction -+ // a non-matching short branch variant -+ // of some long branch? -+ins_attrib ins_alignment(4); // Required alignment attribute (must -+ // be a power of 2) specifies the -+ // alignment that some part of the -+ // instruction (not necessarily the -+ // start) requires. If > 1, a -+ // compute_padding() function must be -+ // provided for the instruction -+ -+//----------OPERANDS----------------------------------------------------------- -+// Operand definitions must precede instruction definitions for correct parsing -+// in the ADLC because operands constitute user defined types which are used in -+// instruction definitions. -+ -+//----------Simple Operands---------------------------------------------------- -+ -+// Integer operands 32 bit -+// 32 bit immediate -+operand immI() -+%{ -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// 32 bit zero -+operand immI0() -+%{ -+ predicate(n->get_int() == 0); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// 32 bit unit increment -+operand immI_1() -+%{ -+ predicate(n->get_int() == 1); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// 32 bit unit decrement -+operand immI_M1() -+%{ -+ predicate(n->get_int() == -1); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Unsigned Integer Immediate: 6-bit int, greater than 32 -+operand uimmI6_ge32() %{ -+ predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32)); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+operand immI_le_4() -+%{ -+ predicate(n->get_int() <= 4); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+operand immI_16() -+%{ -+ predicate(n->get_int() == 16); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+operand immI_24() -+%{ -+ predicate(n->get_int() == 24); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+operand immI_31() -+%{ -+ predicate(n->get_int() == 31); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+operand immI_32() -+%{ -+ predicate(n->get_int() == 32); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+operand immI_63() -+%{ -+ predicate(n->get_int() == 63); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+operand immI_64() -+%{ -+ predicate(n->get_int() == 64); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// 32 bit integer valid for add immediate -+operand immIAdd() -+%{ -+ predicate(Assembler::operand_valid_for_add_immediate((long)n->get_int())); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// 32 bit integer valid for sub immediate -+operand immISub() -+%{ -+ predicate(Assembler::operand_valid_for_add_immediate(-(long)n->get_int())); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// 5 bit signed value. -+operand immI5() -+%{ -+ predicate(n->get_int() <= 15 && n->get_int() >= -16); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// 5 bit signed value (simm5) -+operand immL5() -+%{ -+ predicate(n->get_long() <= 15 && n->get_long() >= -16); -+ match(ConL); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Integer operands 64 bit -+// 64 bit immediate -+operand immL() -+%{ -+ match(ConL); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// 64 bit zero -+operand immL0() -+%{ -+ predicate(n->get_long() == 0); -+ match(ConL); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Pointer operands -+// Pointer Immediate -+operand immP() -+%{ -+ match(ConP); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// NULL Pointer Immediate -+operand immP0() -+%{ -+ predicate(n->get_ptr() == 0); -+ match(ConP); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Pointer Immediate One -+// this is used in object initialization (initial object header) -+operand immP_1() -+%{ -+ predicate(n->get_ptr() == 1); -+ match(ConP); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Polling Page Pointer Immediate -+operand immPollPage() -+%{ -+ predicate((address)n->get_ptr() == os::get_polling_page()); -+ match(ConP); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Card Table Byte Map Base -+operand immByteMapBase() -+%{ -+ // Get base of card map -+ predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && -+ (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); -+ match(ConP); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Int Immediate: low 16-bit mask -+operand immI_16bits() -+%{ -+ predicate(n->get_int() == 0xFFFF); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Long Immediate: low 32-bit mask -+operand immL_32bits() -+%{ -+ predicate(n->get_long() == 0xFFFFFFFFL); -+ match(ConL); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// 64 bit unit decrement -+operand immL_M1() -+%{ -+ predicate(n->get_long() == -1); -+ match(ConL); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+ -+// 32 bit offset of pc in thread anchor -+ -+operand immL_pc_off() -+%{ -+ predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) + -+ in_bytes(JavaFrameAnchor::last_Java_pc_offset())); -+ match(ConL); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// 64 bit integer valid for add immediate -+operand immLAdd() -+%{ -+ predicate(Assembler::operand_valid_for_add_immediate(n->get_long())); -+ match(ConL); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// 64 bit integer valid for sub immediate -+operand immLSub() -+%{ -+ predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long()))); -+ match(ConL); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Narrow pointer operands -+// Narrow Pointer Immediate -+operand immN() -+%{ -+ match(ConN); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Narrow NULL Pointer Immediate -+operand immN0() -+%{ -+ predicate(n->get_narrowcon() == 0); -+ match(ConN); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+operand immNKlass() -+%{ -+ match(ConNKlass); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Float and Double operands -+// Double Immediate -+operand immD() -+%{ -+ match(ConD); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Double Immediate: +0.0d -+operand immD0() -+%{ -+ predicate(jlong_cast(n->getd()) == 0); -+ match(ConD); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Float Immediate -+operand immF() -+%{ -+ match(ConF); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Float Immediate: +0.0f. -+operand immF0() -+%{ -+ predicate(jint_cast(n->getf()) == 0); -+ match(ConF); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+operand immIOffset() -+%{ -+ predicate(is_imm_in_range(n->get_int(), 12, 0)); -+ match(ConI); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+operand immLOffset() -+%{ -+ predicate(is_imm_in_range(n->get_long(), 12, 0)); -+ match(ConL); -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Scale values -+operand immIScale() -+%{ -+ predicate(1 <= n->get_int() && (n->get_int() <= 3)); -+ match(ConI); -+ -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} -+ -+// Integer 32 bit Register Operands -+operand iRegI() -+%{ -+ constraint(ALLOC_IN_RC(any_reg32)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Integer 32 bit Register not Special -+operand iRegINoSp() -+%{ -+ constraint(ALLOC_IN_RC(no_special_reg32)); -+ match(RegI); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Register R10 only -+operand iRegI_R10() -+%{ -+ constraint(ALLOC_IN_RC(int_r10_reg)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Register R12 only -+operand iRegI_R12() -+%{ -+ constraint(ALLOC_IN_RC(int_r12_reg)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Register R13 only -+operand iRegI_R13() -+%{ -+ constraint(ALLOC_IN_RC(int_r13_reg)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Register R14 only -+operand iRegI_R14() -+%{ -+ constraint(ALLOC_IN_RC(int_r14_reg)); -+ match(RegI); -+ match(iRegINoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Integer 64 bit Register Operands -+operand iRegL() -+%{ -+ constraint(ALLOC_IN_RC(any_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Integer 64 bit Register not Special -+operand iRegLNoSp() -+%{ -+ constraint(ALLOC_IN_RC(no_special_reg)); -+ match(RegL); -+ match(iRegL_R10); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Long 64 bit Register R28 only -+operand iRegL_R28() -+%{ -+ constraint(ALLOC_IN_RC(r28_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Long 64 bit Register R29 only -+operand iRegL_R29() -+%{ -+ constraint(ALLOC_IN_RC(r29_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Long 64 bit Register R30 only -+operand iRegL_R30() -+%{ -+ constraint(ALLOC_IN_RC(r30_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Pointer Register Operands -+// Pointer Register -+operand iRegP() -+%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(RegP); -+ match(iRegPNoSp); -+ match(iRegP_R10); -+ match(javaThread_RegP); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Pointer 64 bit Register not Special -+operand iRegPNoSp() -+%{ -+ constraint(ALLOC_IN_RC(no_special_ptr_reg)); -+ match(RegP); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand iRegP_R10() -+%{ -+ constraint(ALLOC_IN_RC(r10_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Pointer 64 bit Register R11 only -+operand iRegP_R11() -+%{ -+ constraint(ALLOC_IN_RC(r11_reg)); -+ match(RegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand iRegP_R12() -+%{ -+ constraint(ALLOC_IN_RC(r12_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Pointer 64 bit Register R13 only -+operand iRegP_R13() -+%{ -+ constraint(ALLOC_IN_RC(r13_reg)); -+ match(RegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand iRegP_R14() -+%{ -+ constraint(ALLOC_IN_RC(r14_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand iRegP_R15() -+%{ -+ constraint(ALLOC_IN_RC(r15_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand iRegP_R16() -+%{ -+ constraint(ALLOC_IN_RC(r16_reg)); -+ match(RegP); -+ // match(iRegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Pointer 64 bit Register R28 only -+operand iRegP_R28() -+%{ -+ constraint(ALLOC_IN_RC(r28_reg)); -+ match(RegP); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Pointer Register Operands -+// Narrow Pointer Register -+operand iRegN() -+%{ -+ constraint(ALLOC_IN_RC(any_reg32)); -+ match(RegN); -+ match(iRegNNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Integer 64 bit Register not Special -+operand iRegNNoSp() -+%{ -+ constraint(ALLOC_IN_RC(no_special_reg32)); -+ match(RegN); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// heap base register -- used for encoding immN0 -+operand iRegIHeapbase() -+%{ -+ constraint(ALLOC_IN_RC(heapbase_reg)); -+ match(RegI); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Long 64 bit Register R10 only -+operand iRegL_R10() -+%{ -+ constraint(ALLOC_IN_RC(r10_reg)); -+ match(RegL); -+ match(iRegLNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Float Register -+// Float register operands -+operand fRegF() -+%{ -+ constraint(ALLOC_IN_RC(float_reg)); -+ match(RegF); -+ -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Double Register -+// Double register operands -+operand fRegD() -+%{ -+ constraint(ALLOC_IN_RC(double_reg)); -+ match(RegD); -+ -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Generic vector class. This will be used for -+// all vector operands. -+operand vReg() -+%{ -+ constraint(ALLOC_IN_RC(vectora_reg)); -+ match(VecA); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand vReg_V1() -+%{ -+ constraint(ALLOC_IN_RC(v1_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand vReg_V2() -+%{ -+ constraint(ALLOC_IN_RC(v2_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand vReg_V3() -+%{ -+ constraint(ALLOC_IN_RC(v3_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand vReg_V4() -+%{ -+ constraint(ALLOC_IN_RC(v4_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand vReg_V5() -+%{ -+ constraint(ALLOC_IN_RC(v5_reg)); -+ match(VecA); -+ match(vReg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+// Java Thread Register -+operand javaThread_RegP(iRegP reg) -+%{ -+ constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg -+ match(reg); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+//----------Memory Operands---------------------------------------------------- -+// RISCV has only base_plus_offset and literal address mode, so no need to use -+// index and scale. Here set index as 0xffffffff and scale as 0x0. -+operand indirect(iRegP reg) -+%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(reg); -+ op_cost(0); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp(0x0); -+ %} -+%} -+ -+operand indOffI(iRegP reg, immIOffset off) -+%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP reg off); -+ op_cost(0); -+ format %{ "[$reg, $off]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} -+%} -+ -+operand indOffL(iRegP reg, immLOffset off) -+%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP reg off); -+ op_cost(0); -+ format %{ "[$reg, $off]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} -+%} -+ -+operand indirectN(iRegN reg) -+%{ -+ predicate(Universe::narrow_oop_shift() == 0); -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(DecodeN reg); -+ op_cost(0); -+ format %{ "[$reg]\t# narrow" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp(0x0); -+ %} -+%} -+ -+operand indOffIN(iRegN reg, immIOffset off) -+%{ -+ predicate(Universe::narrow_oop_shift() == 0); -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP (DecodeN reg) off); -+ op_cost(0); -+ format %{ "[$reg, $off]\t# narrow" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} -+%} -+ -+operand indOffLN(iRegN reg, immLOffset off) -+%{ -+ predicate(Universe::narrow_oop_shift() == 0); -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP (DecodeN reg) off); -+ op_cost(0); -+ format %{ "[$reg, $off]\t# narrow" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} -+%} -+ -+// RISCV opto stubs need to write to the pc slot in the thread anchor -+operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off) -+%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP reg off); -+ op_cost(0); -+ format %{ "[$reg, $off]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} -+%} -+ -+ -+//----------Special Memory Operands-------------------------------------------- -+// Stack Slot Operand - This operand is used for loading and storing temporary -+// values on the stack where a match requires a value to -+// flow through memory. -+operand stackSlotI(sRegI reg) -+%{ -+ constraint(ALLOC_IN_RC(stack_slots)); -+ // No match rule because this operand is only generated in matching -+ // match(RegI); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base(0x02); // RSP -+ index(0xffffffff); // No Index -+ scale(0x0); // No Scale -+ disp($reg); // Stack Offset -+ %} -+%} -+ -+operand stackSlotF(sRegF reg) -+%{ -+ constraint(ALLOC_IN_RC(stack_slots)); -+ // No match rule because this operand is only generated in matching -+ // match(RegF); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base(0x02); // RSP -+ index(0xffffffff); // No Index -+ scale(0x0); // No Scale -+ disp($reg); // Stack Offset -+ %} -+%} -+ -+operand stackSlotD(sRegD reg) -+%{ -+ constraint(ALLOC_IN_RC(stack_slots)); -+ // No match rule because this operand is only generated in matching -+ // match(RegD); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base(0x02); // RSP -+ index(0xffffffff); // No Index -+ scale(0x0); // No Scale -+ disp($reg); // Stack Offset -+ %} -+%} -+ -+operand stackSlotL(sRegL reg) -+%{ -+ constraint(ALLOC_IN_RC(stack_slots)); -+ // No match rule because this operand is only generated in matching -+ // match(RegL); -+ format %{ "[$reg]" %} -+ interface(MEMORY_INTER) %{ -+ base(0x02); // RSP -+ index(0xffffffff); // No Index -+ scale(0x0); // No Scale -+ disp($reg); // Stack Offset -+ %} -+%} -+ -+// Special operand allowing long args to int ops to be truncated for free -+ -+operand iRegL2I(iRegL reg) %{ ++ assert_different_registers(oop, box, tmp, disp_hdr, flag); + -+ op_cost(0); ++ // Always do locking in runtime. ++ if (EmitSync & 0x01) { ++ __ mv(flag, 1); ++ return; ++ } + -+ match(ConvL2I reg); ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ __ biased_locking_exit(oop, tmp, cont, flag); ++ } + -+ format %{ "l2i($reg)" %} ++ // Find the lock address and load the displaced header from the stack. ++ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); + -+ interface(REG_INTER) -+%} ++ // If the displaced header is 0, we have a recursive unlock. ++ __ mv(flag, disp_hdr); ++ __ beqz(disp_hdr, cont); + ++ // Handle existing monitor. ++ if ((EmitSync & 0x02) == 0) { ++ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); ++ __ andi(t0, tmp, markOopDesc::monitor_value); ++ __ bnez(t0, object_has_monitor); ++ } + -+// Comparison Operands -+// NOTE: Label is a predefined operand which should not be redefined in -+// the AD file. It is generically handled within the ADLC. ++ // Check if it is still a light weight lock, this is true if we ++ // see the stack address of the basicLock in the markWord of the ++ // object. + -+//----------Conditional Branch Operands---------------------------------------- -+// Comparison Op - This is the operation of the comparison, and is limited to -+// the following set of codes: -+// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) -+// -+// Other attributes of the comparison, such as unsignedness, are specified -+// by the comparison instruction that sets a condition code flags register. -+// That result is represented by a flags operand whose subtype is appropriate -+// to the unsignedness (etc.) of the comparison. -+// -+// Later, the instruction which matches both the Comparison Op (a Bool) and -+// the flags (produced by the Cmp) specifies the coding of the comparison op -+// by matching a specific subtype of Bool operand below, such as cmpOpU. ++ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, ++ Assembler::rl, /*result*/tmp); ++ __ xorr(flag, box, tmp); // box == tmp if cas succeeds ++ __ j(cont); + ++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + -+// used for signed integral comparisons and fp comparisons -+operand cmpOp() -+%{ -+ match(Bool); ++ // Handle existing monitor. ++ if ((EmitSync & 0x02) == 0) { ++ __ bind(object_has_monitor); ++ STATIC_ASSERT(markOopDesc::monitor_value <= INT_MAX); ++ __ add(tmp, tmp, -(int)markOopDesc::monitor_value); // monitor ++ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); ++ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. ++ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions ++ __ bnez(flag, cont); + -+ format %{ "" %} ++ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); ++ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. ++ __ bnez(flag, cont); ++ // need a release store here ++ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sd(zr, Address(tmp)); // set unowned ++ } + -+ // the values in interface derives from struct BoolTest::mask -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gt"); -+ overflow(0x2, "overflow"); -+ less(0x3, "lt"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "le"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "ge"); ++ __ bind(cont); + %} -+%} + -+// used for unsigned integral comparisons -+operand cmpOpU() -+%{ -+ match(Bool); ++ // arithmetic encodings + -+ format %{ "" %} -+ // the values in interface derives from struct BoolTest::mask -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gtu"); -+ overflow(0x2, "overflow"); -+ less(0x3, "ltu"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "leu"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "geu"); ++ enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivl(dst_reg, src1_reg, src2_reg, false); + %} -+%} + -+// used for certain integral comparisons which can be -+// converted to bxx instructions -+operand cmpOpEqNe() -+%{ -+ match(Bool); -+ op_cost(0); -+ predicate(n->as_Bool()->_test._test == BoolTest::ne || -+ n->as_Bool()->_test._test == BoolTest::eq); ++ enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivq(dst_reg, src1_reg, src2_reg, false); ++ %} + -+ format %{ "" %} -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gt"); -+ overflow(0x2, "overflow"); -+ less(0x3, "lt"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "le"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "ge"); ++ enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivl(dst_reg, src1_reg, src2_reg, true); + %} -+%} + -+operand cmpOpULtGe() -+%{ -+ match(Bool); -+ op_cost(0); -+ predicate(n->as_Bool()->_test._test == BoolTest::lt || -+ n->as_Bool()->_test._test == BoolTest::ge); ++ enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivq(dst_reg, src1_reg, src2_reg, true); ++ %} + -+ format %{ "" %} -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gt"); -+ overflow(0x2, "overflow"); -+ less(0x3, "lt"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "le"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "ge"); ++ enc_class riscv_enc_tail_call(iRegP jump_target) %{ ++ MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ Register target_reg = as_Register($jump_target$$reg); ++ __ jr(target_reg); + %} -+%} + -+operand cmpOpUEqNeLeGt() -+%{ -+ match(Bool); -+ op_cost(0); -+ predicate(n->as_Bool()->_test._test == BoolTest::ne || -+ n->as_Bool()->_test._test == BoolTest::eq || -+ n->as_Bool()->_test._test == BoolTest::le || -+ n->as_Bool()->_test._test == BoolTest::gt); ++ enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ ++ MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ Register target_reg = as_Register($jump_target$$reg); ++ // exception oop should be in x10 ++ // ret addr has been popped into ra ++ // callee expects it in x13 ++ __ mv(x13, ra); ++ __ jr(target_reg); ++ %} + -+ format %{ "" %} -+ interface(COND_INTER) %{ -+ equal(0x0, "eq"); -+ greater(0x1, "gt"); -+ overflow(0x2, "overflow"); -+ less(0x3, "lt"); -+ not_equal(0x4, "ne"); -+ less_equal(0x5, "le"); -+ no_overflow(0x6, "no_overflow"); -+ greater_equal(0x7, "ge"); ++ enc_class riscv_enc_rethrow() %{ ++ MacroAssembler _masm(&cbuf); ++ __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); ++ %} ++ ++ enc_class riscv_enc_ret() %{ ++ MacroAssembler _masm(&cbuf); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ret(); + %} ++ +%} + ++//----------FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add OptoReg::stack0()) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | | | 3 ++// | | +--------+ ++// V | | old out| Empty on Intel, window on Sparc ++// | old |preserve| Must be even aligned. ++// | SP-+--------+----> Matcher::_old_SP, even aligned ++// | | in | 3 area for Intel ret address ++// Owned by |preserve| Empty on Sparc. ++// SELF +--------+ ++// | | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> OptoReg::stack0(), even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by +--------+ ++// CALLEE | new out| 6 Empty on Intel, window on Sparc ++// | new |preserve| Must be even-aligned. ++// | SP-+--------+----> Matcher::_new_SP, even aligned ++// | | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// (the latter is true on Intel but is it false on RISCV?) ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. ++ ++frame %{ ++ // What direction does stack grow in (assumed to be same for C & Java) ++ stack_direction(TOWARDS_LOW); + -+// Flags register, used as output of compare logic -+operand rFlagsReg() -+%{ -+ constraint(ALLOC_IN_RC(reg_flags)); -+ match(RegFlags); ++ // These three registers define part of the calling convention ++ // between compiled code and the interpreter. + -+ op_cost(0); -+ format %{ "RFLAGS" %} -+ interface(REG_INTER); -+%} ++ // Inline Cache Register or methodOop for I2C. ++ inline_cache_reg(R31); + -+// Special Registers ++ // Method Oop Register when calling interpreter. ++ interpreter_method_oop_reg(R31); + -+// Method Register -+operand inline_cache_RegP(iRegP reg) -+%{ -+ constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg -+ match(reg); -+ match(iRegPNoSp); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset); + -+//----------OPERAND CLASSES---------------------------------------------------- -+// Operand Classes are groups of operands that are used as to simplify -+// instruction definitions by not requiring the AD writer to specify -+// separate instructions for every form of operand when the -+// instruction accepts multiple operand types with the same basic -+// encoding and format. The classic case of this is memory operands. ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2 ++ sync_stack_slots(1 * VMRegImpl::slots_per_word); + -+// memory is used to define read/write location for load/store -+// instruction defs. we can turn a memory op into an Address ++ // Compiled code's Frame Pointer ++ frame_pointer(R2); + -+opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN); ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ interpreter_frame_pointer(R8); + -+// iRegIorL2I is used for src inputs in rules for 32 bit int (I) -+// operations. it allows the src to be either an iRegI or a (ConvL2I -+// iRegL). in the latter case the l2i normally planted for a ConvL2I -+// can be elided because the 32-bit instruction will just employ the -+// lower 32 bits anyway. -+// -+// n.b. this does not elide all L2I conversions. if the truncated -+// value is consumed by more than one operation then the ConvL2I -+// cannot be bundled into the consuming nodes so an l2i gets planted -+// (actually a mvw $dst $src) and the downstream instructions consume -+// the result of the l2i as an iRegI input. That's a shame since the -+// mvw is actually redundant but its not too costly. ++ // Stack alignment requirement ++ stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) + -+opclass iRegIorL2I(iRegI, iRegL2I); -+opclass iRegIorL(iRegI, iRegL); -+opclass iRegNorP(iRegN, iRegP); -+opclass iRegILNP(iRegI, iRegL, iRegN, iRegP); -+opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp); -+opclass immIorL(immI, immL); ++ // Number of stack slots between incoming argument block and the start of ++ // a new frame. The PROLOG must add this many slots to the stack. The ++ // EPILOG must remove this many slots. RISC-V needs two slots for ++ // return address and fp. ++ in_preserve_stack_slots(2 * VMRegImpl::slots_per_word); + -+//----------PIPELINE----------------------------------------------------------- -+// Rules which define the behavior of the target architectures pipeline. ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt); + -+// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline -+//pipe_desc(ID, EX, MEM, WR); -+#define ID S0 -+#define EX S1 -+#define MEM S2 -+#define WR S3 ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ // TODO this may well be correct but need to check why that - 2 is there ++ // ppc port uses 0 but we definitely need to allow for fixed_slots ++ // which folds in the space used for monitors ++ return_addr(STACK - 2 + ++ align_up((Compile::current()->in_preserve_stack_slots() + ++ Compile::current()->fixed_slots()), ++ stack_alignment_in_slots())); + -+// Integer ALU reg operation -+pipeline %{ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. + -+attributes %{ -+ // RISC-V instructions are of fixed length -+ fixed_size_instructions; // Fixed size instructions TODO does -+ max_instructions_per_bundle = 2; // Generic RISC-V 1, Sifive Series 7 2 -+ // RISC-V instructions come in 32-bit word units -+ instruction_unit_size = 4; // An instruction is 4 bytes long -+ instruction_fetch_unit_size = 64; // The processor fetches one line -+ instruction_fetch_units = 1; // of 64 bytes ++ calling_convention ++ %{ ++ // No difference between ingoing/outgoing just pass false ++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); ++ %} + -+ // List of nop instructions -+ nops( MachNop ); -+%} ++ c_calling_convention ++ %{ ++ // This is obviously always outgoing ++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length); ++ %} + -+// We don't use an actual pipeline model so don't care about resources -+// or description. we do use pipeline classes to introduce fixed -+// latencies ++ // Location of compiled Java return values. Same as C for now. ++ return_value ++ %{ ++ assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, ++ "only return normal values"); + -+//----------RESOURCES---------------------------------------------------------- -+// Resources are the functional units available to the machine ++ static const int lo[Op_RegL + 1] = { // enum name ++ 0, // Op_Node ++ 0, // Op_Set ++ R10_num, // Op_RegN ++ R10_num, // Op_RegI ++ R10_num, // Op_RegP ++ F10_num, // Op_RegF ++ F10_num, // Op_RegD ++ R10_num // Op_RegL ++ }; + -+// Generic RISC-V pipeline -+// 1 decoder -+// 1 instruction decoded per cycle -+// 1 load/store ops per cycle, 1 branch, 1 FPU -+// 1 mul, 1 div ++ static const int hi[Op_RegL + 1] = { // enum name ++ 0, // Op_Node ++ 0, // Op_Set ++ OptoReg::Bad, // Op_RegN ++ OptoReg::Bad, // Op_RegI ++ R10_H_num, // Op_RegP ++ OptoReg::Bad, // Op_RegF ++ F10_H_num, // Op_RegD ++ R10_H_num // Op_RegL ++ }; + -+resources ( DECODE, -+ ALU, -+ MUL, -+ DIV, -+ BRANCH, -+ LDST, -+ FPU); ++ return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); ++ %} ++%} + -+//----------PIPELINE DESCRIPTION----------------------------------------------- -+// Pipeline Description specifies the stages in the machine's pipeline ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(1); // Required cost attribute + -+// Define the pipeline as a generic 6 stage pipeline -+pipe_desc(S0, S1, S2, S3, S4, S5); ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_short_branch(0); // Required flag: is this instruction ++ // a non-matching short branch variant ++ // of some long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must ++ // be a power of 2) specifies the ++ // alignment that some part of the ++ // instruction (not necessarily the ++ // start) requires. If > 1, a ++ // compute_padding() function must be ++ // provided for the instruction + -+//----------PIPELINE CLASSES--------------------------------------------------- -+// Pipeline Classes describe the stages in which input and output are -+// referenced by the hardware pipeline. ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. + -+pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2) ++//----------Simple Operands---------------------------------------------------- ++ ++// Integer operands 32 bit ++// 32 bit immediate ++operand immI() +%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2) ++// 32 bit zero ++operand immI0() +%{ -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(n->get_int() == 0); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_uop_s(fRegF dst, fRegF src) ++// 32 bit unit increment ++operand immI_1() +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(n->get_int() == 1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 32 bit unit decrement ++operand immI_M1() ++%{ ++ predicate(n->get_int() == -1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_uop_d(fRegD dst, fRegD src) -+%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++// Unsigned Integer Immediate: 6-bit int, greater than 32 ++operand uimmI6_ge32() %{ ++ predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32)); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_d2f(fRegF dst, fRegD src) ++operand immI_le_4() +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(n->get_int() <= 4); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_f2d(fRegD dst, fRegF src) ++operand immI_16() +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(n->get_int() == 16); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_f2i(iRegINoSp dst, fRegF src) ++operand immI_24() +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(n->get_int() == 24); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_f2l(iRegLNoSp dst, fRegF src) ++operand immI_31() +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(n->get_int() == 31); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_i2f(fRegF dst, iRegIorL2I src) ++operand immI_63() +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(n->get_int() == 63); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_l2f(fRegF dst, iRegL src) ++// 32 bit integer valid for add immediate ++operand immIAdd() +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(Assembler::operand_valid_for_add_immediate((int64_t)n->get_int())); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_d2i(iRegINoSp dst, fRegD src) ++// 32 bit integer valid for sub immediate ++operand immISub() +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(Assembler::operand_valid_for_add_immediate(-(int64_t)n->get_int())); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_d2l(iRegLNoSp dst, fRegD src) ++// 5 bit signed value. ++operand immI5() +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(n->get_int() <= 15 && n->get_int() >= -16); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_i2d(fRegD dst, iRegIorL2I src) ++// 5 bit signed value (simm5) ++operand immL5() +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(n->get_long() <= 15 && n->get_long() >= -16); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_l2d(fRegD dst, iRegIorL2I src) ++// Integer operands 64 bit ++// 64 bit immediate ++operand immL() +%{ -+ single_instruction; -+ src : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2) ++// 64 bit zero ++operand immL0() +%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(n->get_long() == 0); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2) ++// Pointer operands ++// Pointer Immediate ++operand immP() +%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ match(ConP); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2) ++// NULL Pointer Immediate ++operand immP0() +%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(n->get_ptr() == 0); ++ match(ConP); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2) ++// Pointer Immediate One ++// this is used in object initialization (initial object header) ++operand immP_1() +%{ -+ single_instruction; -+ src1 : S1(read); -+ src2 : S2(read); -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate(n->get_ptr() == 1); ++ match(ConP); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_load_constant_s(fRegF dst) ++// Polling Page Pointer Immediate ++operand immPollPage() +%{ -+ single_instruction; -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ predicate((address)n->get_ptr() == os::get_polling_page()); ++ match(ConP); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_load_constant_d(fRegD dst) ++// Card Table Byte Map Base ++operand immByteMapBase() +%{ -+ single_instruction; -+ dst : S5(write); -+ DECODE : ID; -+ FPU : S5; ++ // Get base of card map ++ predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && ++ (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); ++ match(ConP); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_load_mem_s(fRegF dst, memory mem) ++// Int Immediate: low 16-bit mask ++operand immI_16bits() +%{ -+ single_instruction; -+ mem : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ LDST : MEM; ++ predicate(n->get_int() == 0xFFFF); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_load_mem_d(fRegD dst, memory mem) -+%{ -+ single_instruction; -+ mem : S1(read); -+ dst : S5(write); -+ DECODE : ID; -+ LDST : MEM; ++operand immIpowerOf2() %{ ++ predicate(is_power_of_2((juint)(n->get_int()))); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_store_reg_s(fRegF src, memory mem) ++// Long Immediate: low 32-bit mask ++operand immL_32bits() +%{ -+ single_instruction; -+ src : S1(read); -+ mem : S5(write); -+ DECODE : ID; -+ LDST : MEM; ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+pipe_class fp_store_reg_d(fRegD src, memory mem) ++// 64 bit unit decrement ++operand immL_M1() +%{ -+ single_instruction; -+ src : S1(read); -+ mem : S5(write); -+ DECODE : ID; -+ LDST : MEM; ++ predicate(n->get_long() == -1); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+//------- Integer ALU operations -------------------------- + -+// Integer ALU reg-reg operation -+// Operands needs in ID, result generated in EX -+// E.g. ADD Rd, Rs1, Rs2 -+pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++// 32 bit offset of pc in thread anchor ++ ++operand immL_pc_off() +%{ -+ single_instruction; -+ dst : EX(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ ALU : EX; ++ predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) + ++ in_bytes(JavaFrameAnchor::last_Java_pc_offset())); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// Integer ALU reg operation with constant shift -+// E.g. SLLI Rd, Rs1, #shift -+pipe_class ialu_reg_shift(iRegI dst, iRegI src1) ++// 64 bit integer valid for add immediate ++operand immLAdd() +%{ -+ single_instruction; -+ dst : EX(write); -+ src1 : ID(read); -+ DECODE : ID; -+ ALU : EX; ++ predicate(Assembler::operand_valid_for_add_immediate(n->get_long())); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// Integer ALU reg-reg operation with variable shift -+// both operands must be available in ID -+// E.g. SLL Rd, Rs1, Rs2 -+pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2) ++// 64 bit integer valid for sub immediate ++operand immLSub() +%{ -+ single_instruction; -+ dst : EX(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ ALU : EX; ++ predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long()))); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// Integer ALU reg operation -+// E.g. NEG Rd, Rs2 -+pipe_class ialu_reg(iRegI dst, iRegI src) ++// Narrow pointer operands ++// Narrow Pointer Immediate ++operand immN() +%{ -+ single_instruction; -+ dst : EX(write); -+ src : ID(read); -+ DECODE : ID; -+ ALU : EX; ++ match(ConN); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// Integer ALU reg immediate operation -+// E.g. ADDI Rd, Rs1, #imm -+pipe_class ialu_reg_imm(iRegI dst, iRegI src1) ++// Narrow NULL Pointer Immediate ++operand immN0() +%{ -+ single_instruction; -+ dst : EX(write); -+ src1 : ID(read); -+ DECODE : ID; -+ ALU : EX; ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// Integer ALU immediate operation (no source operands) -+// E.g. LI Rd, #imm -+pipe_class ialu_imm(iRegI dst) ++operand immNKlass() +%{ -+ single_instruction; -+ dst : EX(write); -+ DECODE : ID; -+ ALU : EX; -+%} ++ match(ConNKlass); + -+//------- Multiply pipeline operations -------------------- ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+// Multiply reg-reg -+// E.g. MULW Rd, Rs1, Rs2 -+pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++// Float and Double operands ++// Double Immediate ++operand immD() +%{ -+ single_instruction; -+ dst : WR(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ MUL : WR; ++ match(ConD); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// E.g. MUL RD, Rs1, Rs2 -+pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++// Double Immediate: +0.0d ++operand immD0() +%{ -+ single_instruction; -+ fixed_latency(3); // Maximum latency for 64 bit mul -+ dst : WR(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ MUL : WR; -+%} ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); + -+//------- Divide pipeline operations -------------------- ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+// E.g. DIVW Rd, Rs1, Rs2 -+pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++// Float Immediate ++operand immF() +%{ -+ single_instruction; -+ fixed_latency(8); // Maximum latency for 32 bit divide -+ dst : WR(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ DIV : WR; ++ match(ConF); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// E.g. DIV RD, Rs1, Rs2 -+pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++// Float Immediate: +0.0f. ++operand immF0() +%{ -+ single_instruction; -+ fixed_latency(16); // Maximum latency for 64 bit divide -+ dst : WR(write); -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ DIV : WR; -+%} ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); + -+//------- Load pipeline operations ------------------------ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} + -+// Load - reg, mem -+// E.g. LA Rd, mem -+pipe_class iload_reg_mem(iRegI dst, memory mem) ++operand immIOffset() +%{ -+ single_instruction; -+ dst : WR(write); -+ mem : ID(read); -+ DECODE : ID; -+ LDST : MEM; ++ predicate(is_imm_in_range(n->get_int(), 12, 0)); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// Load - reg, reg -+// E.g. LD Rd, Rs -+pipe_class iload_reg_reg(iRegI dst, iRegI src) ++operand immLOffset() +%{ -+ single_instruction; -+ dst : WR(write); -+ src : ID(read); -+ DECODE : ID; -+ LDST : MEM; ++ predicate(is_imm_in_range(n->get_long(), 12, 0)); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+//------- Store pipeline operations ----------------------- -+ -+// Store - zr, mem -+// E.g. SD zr, mem -+pipe_class istore_mem(memory mem) ++// Scale values ++operand immIScale() +%{ -+ single_instruction; -+ mem : ID(read); -+ DECODE : ID; -+ LDST : MEM; ++ predicate(1 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+// Store - reg, mem -+// E.g. SD Rs, mem -+pipe_class istore_reg_mem(iRegI src, memory mem) ++// Integer 32 bit Register Operands ++operand iRegI() +%{ -+ single_instruction; -+ mem : ID(read); -+ src : EX(read); -+ DECODE : ID; -+ LDST : MEM; ++ constraint(ALLOC_IN_RC(any_reg32)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Store - reg, reg -+// E.g. SD Rs2, Rs1 -+pipe_class istore_reg_reg(iRegI dst, iRegI src) ++// Integer 32 bit Register not Special ++operand iRegINoSp() +%{ -+ single_instruction; -+ dst : ID(read); -+ src : EX(read); -+ DECODE : ID; -+ LDST : MEM; ++ constraint(ALLOC_IN_RC(no_special_reg32)); ++ match(RegI); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+//------- Store pipeline operations ----------------------- -+ -+// Branch -+pipe_class pipe_branch() ++// Register R10 only ++operand iRegI_R10() +%{ -+ single_instruction; -+ DECODE : ID; -+ BRANCH : EX; ++ constraint(ALLOC_IN_RC(int_r10_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Branch -+pipe_class pipe_branch_reg(iRegI src) ++// Register R12 only ++operand iRegI_R12() +%{ -+ single_instruction; -+ src : ID(read); -+ DECODE : ID; -+ BRANCH : EX; ++ constraint(ALLOC_IN_RC(int_r12_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Compare & Branch -+// E.g. BEQ Rs1, Rs2, L -+pipe_class pipe_cmp_branch(iRegI src1, iRegI src2) ++// Register R13 only ++operand iRegI_R13() +%{ -+ single_instruction; -+ src1 : ID(read); -+ src2 : ID(read); -+ DECODE : ID; -+ BRANCH : EX; ++ constraint(ALLOC_IN_RC(int_r13_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// E.g. BEQZ Rs, L -+pipe_class pipe_cmpz_branch(iRegI src) ++// Register R14 only ++operand iRegI_R14() +%{ -+ single_instruction; -+ src : ID(read); -+ DECODE : ID; -+ BRANCH : EX; ++ constraint(ALLOC_IN_RC(int_r14_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+//------- Synchronisation operations ---------------------- -+// Any operation requiring serialization -+// E.g. FENCE/Atomic Ops/Load Acquire/Store Release -+pipe_class pipe_serial() ++// Integer 64 bit Register Operands ++operand iRegL() +%{ -+ single_instruction; -+ force_serialization; -+ fixed_latency(16); -+ DECODE : ID; -+ LDST : MEM; ++ constraint(ALLOC_IN_RC(any_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+pipe_class pipe_slow() ++// Integer 64 bit Register not Special ++operand iRegLNoSp() +%{ -+ instruction_count(10); -+ multiple_bundles; -+ force_serialization; -+ fixed_latency(16); -+ DECODE : ID; -+ LDST : MEM; ++ constraint(ALLOC_IN_RC(no_special_reg)); ++ match(RegL); ++ match(iRegL_R10); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Empty pipeline class -+pipe_class pipe_class_empty() ++// Long 64 bit Register R28 only ++operand iRegL_R28() +%{ -+ single_instruction; -+ fixed_latency(0); ++ constraint(ALLOC_IN_RC(r28_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Default pipeline class. -+pipe_class pipe_class_default() ++// Long 64 bit Register R29 only ++operand iRegL_R29() +%{ -+ single_instruction; -+ fixed_latency(2); ++ constraint(ALLOC_IN_RC(r29_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Pipeline class for compares. -+pipe_class pipe_class_compare() ++// Long 64 bit Register R30 only ++operand iRegL_R30() +%{ -+ single_instruction; -+ fixed_latency(16); ++ constraint(ALLOC_IN_RC(r30_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Pipeline class for memory operations. -+pipe_class pipe_class_memory() ++// Pointer Register Operands ++// Pointer Register ++operand iRegP() +%{ -+ single_instruction; -+ fixed_latency(16); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ match(iRegP_R10); ++ match(javaThread_RegP); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Pipeline class for call. -+pipe_class pipe_class_call() ++// Pointer 64 bit Register not Special ++operand iRegPNoSp() +%{ -+ single_instruction; -+ fixed_latency(100); ++ constraint(ALLOC_IN_RC(no_special_ptr_reg)); ++ match(RegP); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Define the class for the Nop node. -+define %{ -+ MachNop = pipe_class_empty; -+%} ++operand iRegP_R10() ++%{ ++ constraint(ALLOC_IN_RC(r10_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} -+//----------INSTRUCTIONS------------------------------------------------------- -+// -+// match -- States which machine-independent subtree may be replaced -+// by this instruction. -+// ins_cost -- The estimated cost of this instruction is used by instruction -+// selection to identify a minimum cost tree of machine -+// instructions that matches a tree of machine-independent -+// instructions. -+// format -- A string providing the disassembly for this instruction. -+// The value of an instruction's operand may be inserted -+// by referring to it with a '$' prefix. -+// opcode -- Three instruction opcodes may be provided. These are referred -+// to within an encode class as $primary, $secondary, and $tertiary -+// rrspectively. The primary opcode is commonly used to -+// indicate the type of machine instruction, while secondary -+// and tertiary are often used for prefix options or addressing -+// modes. -+// ins_encode -- A list of encode classes with parameters. The encode class -+// name must have been defined in an 'enc_class' specification -+// in the encode section of the architecture description. -+ -+// ============================================================================ -+// Memory (Load/Store) Instructions -+ -+// Load Instructions + -+// Load Byte (8 bit signed) -+instruct loadB(iRegINoSp dst, memory mem) ++// Pointer 64 bit Register R11 only ++operand iRegP_R11() +%{ -+ match(Set dst (LoadB mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lb $dst, $mem\t# byte, #@loadB" %} -+ -+ ins_encode %{ -+ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(iload_reg_mem); ++ constraint(ALLOC_IN_RC(r11_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Byte (8 bit signed) into long -+instruct loadB2L(iRegLNoSp dst, memory mem) ++operand iRegP_R12() +%{ -+ match(Set dst (ConvI2L (LoadB mem))); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lb $dst, $mem\t# byte, #@loadB2L" %} -+ -+ ins_encode %{ -+ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(iload_reg_mem); ++ constraint(ALLOC_IN_RC(r12_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Byte (8 bit unsigned) -+instruct loadUB(iRegINoSp dst, memory mem) ++// Pointer 64 bit Register R13 only ++operand iRegP_R13() +%{ -+ match(Set dst (LoadUB mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lbu $dst, $mem\t# byte, #@loadUB" %} -+ -+ ins_encode %{ -+ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(iload_reg_mem); ++ constraint(ALLOC_IN_RC(r13_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Byte (8 bit unsigned) into long -+instruct loadUB2L(iRegLNoSp dst, memory mem) ++operand iRegP_R14() +%{ -+ match(Set dst (ConvI2L (LoadUB mem))); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lbu $dst, $mem\t# byte, #@loadUB2L" %} -+ -+ ins_encode %{ -+ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(iload_reg_mem); ++ constraint(ALLOC_IN_RC(r14_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Short (16 bit signed) -+instruct loadS(iRegINoSp dst, memory mem) ++operand iRegP_R15() +%{ -+ match(Set dst (LoadS mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lh $dst, $mem\t# short, #@loadS" %} -+ -+ ins_encode %{ -+ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(iload_reg_mem); ++ constraint(ALLOC_IN_RC(r15_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Short (16 bit signed) into long -+instruct loadS2L(iRegLNoSp dst, memory mem) ++operand iRegP_R16() +%{ -+ match(Set dst (ConvI2L (LoadS mem))); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lh $dst, $mem\t# short, #@loadS2L" %} -+ -+ ins_encode %{ -+ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(iload_reg_mem); ++ constraint(ALLOC_IN_RC(r16_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Char (16 bit unsigned) -+instruct loadUS(iRegINoSp dst, memory mem) ++// Pointer 64 bit Register R28 only ++operand iRegP_R28() +%{ -+ match(Set dst (LoadUS mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lhu $dst, $mem\t# short, #@loadUS" %} -+ -+ ins_encode %{ -+ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(iload_reg_mem); ++ constraint(ALLOC_IN_RC(r28_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Short/Char (16 bit unsigned) into long -+instruct loadUS2L(iRegLNoSp dst, memory mem) ++// Pointer Register Operands ++// Narrow Pointer Register ++operand iRegN() +%{ -+ match(Set dst (ConvI2L (LoadUS mem))); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lhu $dst, $mem\t# short, #@loadUS2L" %} -+ -+ ins_encode %{ -+ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(iload_reg_mem); ++ constraint(ALLOC_IN_RC(any_reg32)); ++ match(RegN); ++ match(iRegNNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Integer (32 bit signed) -+instruct loadI(iRegINoSp dst, memory mem) ++// Integer 64 bit Register not Special ++operand iRegNNoSp() +%{ -+ match(Set dst (LoadI mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lw $dst, $mem\t# int, #@loadI" %} -+ -+ ins_encode %{ -+ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(iload_reg_mem); ++ constraint(ALLOC_IN_RC(no_special_reg32)); ++ match(RegN); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Integer (32 bit signed) into long -+instruct loadI2L(iRegLNoSp dst, memory mem) ++// heap base register -- used for encoding immN0 ++operand iRegIHeapbase() +%{ -+ match(Set dst (ConvI2L (LoadI mem))); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lw $dst, $mem\t# int, #@loadI2L" %} -+ -+ ins_encode %{ -+ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(iload_reg_mem); ++ constraint(ALLOC_IN_RC(heapbase_reg)); ++ match(RegI); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Integer (32 bit unsigned) into long -+instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) ++// Long 64 bit Register R10 only ++operand iRegL_R10() +%{ -+ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lwu $dst, $mem\t# int, #@loadUI2L" %} -+ -+ ins_encode %{ -+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(iload_reg_mem); ++ constraint(ALLOC_IN_RC(r10_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Long (64 bit signed) -+instruct loadL(iRegLNoSp dst, memory mem) ++// Float Register ++// Float register operands ++operand fRegF() +%{ -+ match(Set dst (LoadL mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "ld $dst, $mem\t# int, #@loadL" %} -+ -+ ins_encode %{ -+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ constraint(ALLOC_IN_RC(float_reg)); ++ match(RegF); + -+ ins_pipe(iload_reg_mem); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Range -+instruct loadRange(iRegINoSp dst, memory mem) ++// Double Register ++// Double register operands ++operand fRegD() +%{ -+ match(Set dst (LoadRange mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lwu $dst, $mem\t# range, #@loadRange" %} -+ -+ ins_encode %{ -+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ constraint(ALLOC_IN_RC(double_reg)); ++ match(RegD); + -+ ins_pipe(iload_reg_mem); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Pointer -+instruct loadP(iRegPNoSp dst, memory mem) ++// Java Thread Register ++operand javaThread_RegP(iRegP reg) +%{ -+ match(Set dst (LoadP mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "ld $dst, $mem\t# ptr, #@loadP" %} ++ constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg ++ match(reg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} + -+ ins_encode %{ -+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++//----------Memory Operands---------------------------------------------------- ++// RISCV has only base_plus_offset and literal address mode, so no need to use ++// index and scale. Here set index as 0xffffffff and scale as 0x0. ++operand indirect(iRegP reg) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(reg); ++ op_cost(0); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp(0x0); + %} -+ -+ ins_pipe(iload_reg_mem); +%} + -+// Load Compressed Pointer -+instruct loadN(iRegNNoSp dst, memory mem) ++operand indOffI(iRegP reg, immIOffset off) +%{ -+ match(Set dst (LoadN mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lwu $dst, $mem\t# loadN, compressed ptr, #@loadN" %} -+ -+ ins_encode %{ -+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} -+ -+ ins_pipe(iload_reg_mem); +%} + -+// Load Klass Pointer -+instruct loadKlass(iRegPNoSp dst, memory mem) ++operand indOffL(iRegP reg, immLOffset off) +%{ -+ match(Set dst (LoadKlass mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "ld $dst, $mem\t# class, #@loadKlass" %} -+ -+ ins_encode %{ -+ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} -+ -+ ins_pipe(iload_reg_mem); +%} + -+// Load Narrow Klass Pointer -+instruct loadNKlass(iRegNNoSp dst, memory mem) ++operand indirectN(iRegN reg) +%{ -+ match(Set dst (LoadNKlass mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "lwu $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %} -+ -+ ins_encode %{ -+ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(DecodeN reg); ++ op_cost(0); ++ format %{ "[$reg]\t# narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp(0x0); + %} -+ -+ ins_pipe(iload_reg_mem); +%} + -+// Load Float -+instruct loadF(fRegF dst, memory mem) ++operand indOffIN(iRegN reg, immIOffset off) +%{ -+ match(Set dst (LoadF mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "flw $dst, $mem\t# float, #@loadF" %} -+ -+ ins_encode %{ -+ __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP (DecodeN reg) off); ++ op_cost(0); ++ format %{ "[$reg, $off]\t# narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} -+ -+ ins_pipe(fp_load_mem_s); +%} + -+// Load Double -+instruct loadD(fRegD dst, memory mem) ++operand indOffLN(iRegN reg, immLOffset off) +%{ -+ match(Set dst (LoadD mem)); -+ -+ ins_cost(LOAD_COST); -+ format %{ "fld $dst, $mem\t# double, #@loadD" %} -+ -+ ins_encode %{ -+ __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP (DecodeN reg) off); ++ op_cost(0); ++ format %{ "[$reg, $off]\t# narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); + %} -+ -+ ins_pipe(fp_load_mem_d); +%} + -+// Load Int Constant -+instruct loadConI(iRegINoSp dst, immI src) ++// RISCV opto stubs need to write to the pc slot in the thread anchor ++operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off) +%{ -+ match(Set dst src); -+ -+ ins_cost(ALU_COST); -+ format %{ "li $dst, $src\t# int, #@loadConI" %} ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} + -+ ins_encode(riscv_enc_li_imm(dst, src)); + -+ ins_pipe(ialu_imm); ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotI(sRegI reg) ++%{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegI); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} +%} + -+// Load Long Constant -+instruct loadConL(iRegLNoSp dst, immL src) ++operand stackSlotF(sRegF reg) +%{ -+ match(Set dst src); -+ -+ ins_cost(ALU_COST); -+ format %{ "li $dst, $src\t# long, #@loadConL" %} -+ -+ ins_encode(riscv_enc_li_imm(dst, src)); -+ -+ ins_pipe(ialu_imm); ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegF); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} +%} + -+// Load Pointer Constant -+instruct loadConP(iRegPNoSp dst, immP con) ++operand stackSlotD(sRegD reg) +%{ -+ match(Set dst con); -+ -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# ptr, #@loadConP" %} -+ -+ ins_encode(riscv_enc_mov_p(dst, con)); -+ -+ ins_pipe(ialu_imm); ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegD); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} +%} + -+// Load Null Pointer Constant -+instruct loadConP0(iRegPNoSp dst, immP0 con) ++operand stackSlotL(sRegL reg) +%{ -+ match(Set dst con); -+ -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# NULL ptr, #@loadConP0" %} ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegL); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} + -+ ins_encode(riscv_enc_mov_zero(dst)); ++// Special operand allowing long args to int ops to be truncated for free + -+ ins_pipe(ialu_imm); -+%} ++operand iRegL2I(iRegL reg) %{ + -+// Load Pointer Constant One -+instruct loadConP1(iRegPNoSp dst, immP_1 con) -+%{ -+ match(Set dst con); ++ op_cost(0); + -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# load ptr constant one, #@loadConP1" %} ++ match(ConvL2I reg); + -+ ins_encode(riscv_enc_mov_p1(dst)); ++ format %{ "l2i($reg)" %} + -+ ins_pipe(ialu_imm); ++ interface(REG_INTER) +%} + -+// Load Poll Page Constant -+instruct loadConPollPage(iRegPNoSp dst, immPollPage con) -+%{ -+ match(Set dst con); + -+ ins_cost(ALU_COST * 6); -+ format %{ "movptr $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %} ++// Comparison Operands ++// NOTE: Label is a predefined operand which should not be redefined in ++// the AD file. It is generically handled within the ADLC. + -+ ins_encode(riscv_enc_mov_poll_page(dst, con)); ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOpU. + -+ ins_pipe(ialu_imm); -+%} + -+// Load Byte Map Base Constant -+instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) ++// used for signed integral comparisons and fp comparisons ++operand cmpOp() +%{ -+ match(Set dst con); -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# Byte Map Base, #@loadByteMapBase" %} ++ match(Bool); + -+ ins_encode(riscv_enc_mov_byte_map_base(dst)); ++ format %{ "" %} + -+ ins_pipe(ialu_imm); ++ // the values in interface derives from struct BoolTest::mask ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gt"); ++ overflow(0x2, "overflow"); ++ less(0x3, "lt"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "le"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "ge"); ++ %} +%} + -+// Load Narrow Pointer Constant -+instruct loadConN(iRegNNoSp dst, immN con) ++// used for unsigned integral comparisons ++operand cmpOpU() +%{ -+ match(Set dst con); -+ -+ ins_cost(ALU_COST * 4); -+ format %{ "mv $dst, $con\t# compressed ptr, #@loadConN" %} -+ -+ ins_encode(riscv_enc_mov_n(dst, con)); ++ match(Bool); + -+ ins_pipe(ialu_imm); ++ format %{ "" %} ++ // the values in interface derives from struct BoolTest::mask ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gtu"); ++ overflow(0x2, "overflow"); ++ less(0x3, "ltu"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "leu"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "geu"); ++ %} +%} + -+// Load Narrow Null Pointer Constant -+instruct loadConN0(iRegNNoSp dst, immN0 con) ++// used for certain integral comparisons which can be ++// converted to bxx instructions ++operand cmpOpEqNe() +%{ -+ match(Set dst con); -+ -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $con\t# compressed NULL ptr, #@loadConN0" %} -+ -+ ins_encode(riscv_enc_mov_zero(dst)); ++ match(Bool); ++ op_cost(0); ++ predicate(n->as_Bool()->_test._test == BoolTest::ne || ++ n->as_Bool()->_test._test == BoolTest::eq); + -+ ins_pipe(ialu_imm); ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gt"); ++ overflow(0x2, "overflow"); ++ less(0x3, "lt"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "le"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "ge"); ++ %} +%} + -+// Load Narrow Klass Constant -+instruct loadConNKlass(iRegNNoSp dst, immNKlass con) ++operand cmpOpULtGe() +%{ -+ match(Set dst con); -+ -+ ins_cost(ALU_COST * 6); -+ format %{ "mv $dst, $con\t# compressed klass ptr, #@loadConNKlass" %} -+ -+ ins_encode(riscv_enc_mov_nk(dst, con)); ++ match(Bool); ++ op_cost(0); ++ predicate(n->as_Bool()->_test._test == BoolTest::lt || ++ n->as_Bool()->_test._test == BoolTest::ge); + -+ ins_pipe(ialu_imm); ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gtu"); ++ overflow(0x2, "overflow"); ++ less(0x3, "ltu"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "leu"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "geu"); ++ %} +%} + -+// Load Float Constant -+instruct loadConF(fRegF dst, immF con) %{ -+ match(Set dst con); -+ -+ ins_cost(LOAD_COST); -+ format %{ -+ "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF" -+ %} ++operand cmpOpUEqNeLeGt() ++%{ ++ match(Bool); ++ op_cost(0); ++ predicate(n->as_Bool()->_test._test == BoolTest::ne || ++ n->as_Bool()->_test._test == BoolTest::eq || ++ n->as_Bool()->_test._test == BoolTest::le || ++ n->as_Bool()->_test._test == BoolTest::gt); + -+ ins_encode %{ -+ __ flw(as_FloatRegister($dst$$reg), $constantaddress($con)); ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gtu"); ++ overflow(0x2, "overflow"); ++ less(0x3, "ltu"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "leu"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "geu"); + %} -+ -+ ins_pipe(fp_load_constant_s); +%} + -+instruct loadConF0(fRegF dst, immF0 con) %{ -+ match(Set dst con); + -+ ins_cost(XFER_COST); ++// Flags register, used as output of compare logic ++operand rFlagsReg() ++%{ ++ constraint(ALLOC_IN_RC(reg_flags)); ++ match(RegFlags); + -+ format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %} ++ op_cost(0); ++ format %{ "RFLAGS" %} ++ interface(REG_INTER); ++%} + -+ ins_encode %{ -+ __ fmv_w_x(as_FloatRegister($dst$$reg), zr); -+ %} ++// Special Registers + -+ ins_pipe(fp_load_constant_s); ++// Method Register ++operand inline_cache_RegP(iRegP reg) ++%{ ++ constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg ++ match(reg); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); +%} + -+// Load Double Constant -+instruct loadConD(fRegD dst, immD con) %{ -+ match(Set dst con); ++//----------OPERAND CLASSES---------------------------------------------------- ++// Operand Classes are groups of operands that are used as to simplify ++// instruction definitions by not requiring the AD writer to specify ++// separate instructions for every form of operand when the ++// instruction accepts multiple operand types with the same basic ++// encoding and format. The classic case of this is memory operands. + -+ ins_cost(LOAD_COST); -+ format %{ -+ "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD" -+ %} ++// memory is used to define read/write location for load/store ++// instruction defs. we can turn a memory op into an Address + -+ ins_encode %{ -+ __ fld(as_FloatRegister($dst$$reg), $constantaddress($con)); -+ %} ++opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN); + -+ ins_pipe(fp_load_constant_d); -+%} ++// iRegIorL2I is used for src inputs in rules for 32 bit int (I) ++// operations. it allows the src to be either an iRegI or a (ConvL2I ++// iRegL). in the latter case the l2i normally planted for a ConvL2I ++// can be elided because the 32-bit instruction will just employ the ++// lower 32 bits anyway. ++// ++// n.b. this does not elide all L2I conversions. if the truncated ++// value is consumed by more than one operation then the ConvL2I ++// cannot be bundled into the consuming nodes so an l2i gets planted ++// (actually a mvw $dst $src) and the downstream instructions consume ++// the result of the l2i as an iRegI input. That's a shame since the ++// mvw is actually redundant but its not too costly. + -+instruct loadConD0(fRegD dst, immD0 con) %{ -+ match(Set dst con); ++opclass iRegIorL2I(iRegI, iRegL2I); ++opclass iRegIorL(iRegI, iRegL); ++opclass iRegNorP(iRegN, iRegP); ++opclass iRegILNP(iRegI, iRegL, iRegN, iRegP); ++opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp); ++opclass immIorL(immI, immL); + -+ ins_cost(XFER_COST); ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. + -+ format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %} ++// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline ++//pipe_desc(ID, EX, MEM, WR); ++#define ID S0 ++#define EX S1 ++#define MEM S2 ++#define WR S3 + -+ ins_encode %{ -+ __ fmv_d_x(as_FloatRegister($dst$$reg), zr); -+ %} ++// Integer ALU reg operation ++pipeline %{ + -+ ins_pipe(fp_load_constant_d); ++attributes %{ ++ // RISC-V instructions are of fixed length ++ fixed_size_instructions; // Fixed size instructions TODO does ++ max_instructions_per_bundle = 2; // Generic RISC-V 1, Sifive Series 7 2 ++ // RISC-V instructions come in 32-bit word units ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 64; // The processor fetches one line ++ instruction_fetch_units = 1; // of 64 bytes ++ ++ // List of nop instructions ++ nops( MachNop ); +%} + -+// Store Instructions -+// Store CMS card-mark Immediate -+instruct storeimmCM0(immI0 zero, memory mem) -+%{ -+ match(Set mem (StoreCM mem zero)); -+ predicate(unnecessary_storestore(n)); ++// We don't use an actual pipeline model so don't care about resources ++// or description. we do use pipeline classes to introduce fixed ++// latencies + -+ ins_cost(STORE_COST); -+ format %{ "storestore (elided)\n\t" -+ "sb zr, $mem\t# byte, #@storeimmCM0" %} ++//----------RESOURCES---------------------------------------------------------- ++// Resources are the functional units available to the machine + -+ ins_encode %{ -+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++// Generic RISC-V pipeline ++// 1 decoder ++// 1 instruction decoded per cycle ++// 1 load/store ops per cycle, 1 branch, 1 FPU ++// 1 mul, 1 div + -+ ins_pipe(istore_mem); -+%} ++resources ( DECODE, ++ ALU, ++ MUL, ++ DIV, ++ BRANCH, ++ LDST, ++ FPU); + -+// Store CMS card-mark Immediate with intervening StoreStore -+// needed when using CMS with no conditional card marking -+instruct storeimmCM0_ordered(immI0 zero, memory mem) -+%{ -+ match(Set mem (StoreCM mem zero)); ++//----------PIPELINE DESCRIPTION----------------------------------------------- ++// Pipeline Description specifies the stages in the machine's pipeline + -+ ins_cost(ALU_COST + STORE_COST); -+ format %{ "membar(StoreStore)\n\t" -+ "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %} ++// Define the pipeline as a generic 6 stage pipeline ++pipe_desc(S0, S1, S2, S3, S4, S5); + -+ ins_encode %{ -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++//----------PIPELINE CLASSES--------------------------------------------------- ++// Pipeline Classes describe the stages in which input and output are ++// referenced by the hardware pipeline. + -+ ins_pipe(istore_mem); ++pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2) ++%{ ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Store Byte -+instruct storeB(iRegIorL2I src, memory mem) ++pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2) +%{ -+ match(Set mem (StoreB mem src)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sb $src, $mem\t# byte, #@storeB" %} -+ -+ ins_encode %{ -+ __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(istore_reg_mem); ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct storeimmB0(immI0 zero, memory mem) ++pipe_class fp_uop_s(fRegF dst, fRegF src) +%{ -+ match(Set mem (StoreB mem zero)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sb zr, $mem\t# byte, #@storeimmB0" %} -+ -+ ins_encode %{ -+ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(istore_mem); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Store Char/Short -+instruct storeC(iRegIorL2I src, memory mem) ++pipe_class fp_uop_d(fRegD dst, fRegD src) +%{ -+ match(Set mem (StoreC mem src)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sh $src, $mem\t# short, #@storeC" %} -+ -+ ins_encode %{ -+ __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(istore_reg_mem); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct storeimmC0(immI0 zero, memory mem) ++pipe_class fp_d2f(fRegF dst, fRegD src) +%{ -+ match(Set mem (StoreC mem zero)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sh zr, $mem\t# short, #@storeimmC0" %} -+ -+ ins_encode %{ -+ __ sh(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(istore_mem); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Store Integer -+instruct storeI(iRegIorL2I src, memory mem) ++pipe_class fp_f2d(fRegD dst, fRegF src) +%{ -+ match(Set mem(StoreI mem src)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sw $src, $mem\t# int, #@storeI" %} -+ -+ ins_encode %{ -+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(istore_reg_mem); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct storeimmI0(immI0 zero, memory mem) ++pipe_class fp_f2i(iRegINoSp dst, fRegF src) +%{ -+ match(Set mem(StoreI mem zero)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sw zr, $mem\t# int, #@storeimmI0" %} -+ -+ ins_encode %{ -+ __ sw(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(istore_mem); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Store Long (64 bit signed) -+instruct storeL(iRegL src, memory mem) ++pipe_class fp_f2l(iRegLNoSp dst, fRegF src) +%{ -+ match(Set mem (StoreL mem src)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sd $src, $mem\t# long, #@storeL" %} -+ -+ ins_encode %{ -+ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(istore_reg_mem); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Store Long (64 bit signed) -+instruct storeimmL0(immL0 zero, memory mem) ++pipe_class fp_i2f(fRegF dst, iRegIorL2I src) +%{ -+ match(Set mem (StoreL mem zero)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sd zr, $mem\t# long, #@storeimmL0" %} -+ -+ ins_encode %{ -+ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(istore_mem); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Store Pointer -+instruct storeP(iRegP src, memory mem) ++pipe_class fp_l2f(fRegF dst, iRegL src) +%{ -+ match(Set mem (StoreP mem src)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sd $src, $mem\t# ptr, #@storeP" %} -+ -+ ins_encode %{ -+ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(istore_reg_mem); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Store Pointer -+instruct storeimmP0(immP0 zero, memory mem) ++pipe_class fp_d2i(iRegINoSp dst, fRegD src) +%{ -+ match(Set mem (StoreP mem zero)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %} -+ -+ ins_encode %{ -+ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(istore_mem); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Store Compressed Pointer -+instruct storeN(iRegN src, memory mem) ++pipe_class fp_d2l(iRegLNoSp dst, fRegD src) +%{ -+ match(Set mem (StoreN mem src)); -+ -+ ins_cost(STORE_COST); -+ format %{ "sw $src, $mem\t# compressed ptr, #@storeN" %} -+ -+ ins_encode %{ -+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} -+ -+ ins_pipe(istore_reg_mem); ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) ++pipe_class fp_i2d(fRegD dst, iRegIorL2I src) +%{ -+ match(Set mem (StoreN mem zero)); -+ predicate(Universe::narrow_oop_base() == NULL && -+ Universe::narrow_klass_base() == NULL); -+ -+ ins_cost(STORE_COST); -+ format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} -+ -+ ins_encode %{ -+ __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} + -+ ins_pipe(istore_reg_mem); ++pipe_class fp_l2d(fRegD dst, iRegIorL2I src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Store Float -+instruct storeF(fRegF src, memory mem) ++pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2) +%{ -+ match(Set mem (StoreF mem src)); ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} + -+ ins_cost(STORE_COST); -+ format %{ "fsw $src, $mem\t# float, #@storeF" %} ++pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2) ++%{ ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} + -+ ins_encode %{ -+ __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2) ++%{ ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} + -+ ins_pipe(fp_store_reg_s); ++pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2) ++%{ ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; +%} + -+// Store Double -+instruct storeD(fRegD src, memory mem) ++pipe_class fp_load_constant_s(fRegF dst) +%{ -+ match(Set mem (StoreD mem src)); ++ single_instruction; ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} + -+ ins_cost(STORE_COST); -+ format %{ "fsd $src, $mem\t# double, #@storeD" %} ++pipe_class fp_load_constant_d(fRegD dst) ++%{ ++ single_instruction; ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} + -+ ins_encode %{ -+ __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++pipe_class fp_load_mem_s(fRegF dst, memory mem) ++%{ ++ single_instruction; ++ mem : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ ins_pipe(fp_store_reg_d); ++pipe_class fp_load_mem_d(fRegD dst, memory mem) ++%{ ++ single_instruction; ++ mem : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// Store Compressed Klass Pointer -+instruct storeNKlass(iRegN src, memory mem) ++pipe_class fp_store_reg_s(fRegF src, memory mem) +%{ -+ match(Set mem (StoreNKlass mem src)); ++ single_instruction; ++ src : S1(read); ++ mem : S5(write); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ ins_cost(STORE_COST); -+ format %{ "sw $src, $mem\t# compressed klass ptr, #@storeNKlass" %} ++pipe_class fp_store_reg_d(fRegD src, memory mem) ++%{ ++ single_instruction; ++ src : S1(read); ++ mem : S5(write); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ ins_encode %{ -+ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); -+ %} ++//------- Integer ALU operations -------------------------- + -+ ins_pipe(istore_reg_mem); ++// Integer ALU reg-reg operation ++// Operands needs in ID, result generated in EX ++// E.g. ADD Rd, Rs1, Rs2 ++pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ ALU : EX; +%} + -+// ============================================================================ -+// Atomic operation instructions -+// -+// Intel and SPARC both implement Ideal Node LoadPLocked and -+// Store{PIL}Conditional instructions using a normal load for the -+// LoadPLocked and a CAS for the Store{PIL}Conditional. -+// -+// The ideal code appears only to use LoadPLocked/storePConditional as a -+// pair to lock object allocations from Eden space when not using -+// TLABs. -+// -+// There does not appear to be a Load{IL}Locked Ideal Node and the -+// Ideal code appears to use Store{IL}Conditional as an alias for CAS -+// and to use StoreIConditional only for 32-bit and StoreLConditional -+// only for 64-bit. -+// -+// We implement LoadPLocked and storePConditional instructions using, -+// respectively the RISCV hw load-reserve and store-conditional -+// instructions. Whereas we must implement each of -+// Store{IL}Conditional using a CAS which employs a pair of -+// instructions comprising a load-reserve followed by a -+// store-conditional. ++// Integer ALU reg operation with constant shift ++// E.g. SLLI Rd, Rs1, #shift ++pipe_class ialu_reg_shift(iRegI dst, iRegI src1) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} + ++// Integer ALU reg-reg operation with variable shift ++// both operands must be available in ID ++// E.g. SLL Rd, Rs1, Rs2 ++pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} + -+// Locked-load (load reserved) of the current heap-top -+// used when updating the eden heap top -+// implemented using lr_d on RISCV64 -+instruct loadPLocked(iRegPNoSp dst, indirect mem) ++// Integer ALU reg operation ++// E.g. NEG Rd, Rs2 ++pipe_class ialu_reg(iRegI dst, iRegI src) +%{ -+ match(Set dst (LoadPLocked mem)); ++ single_instruction; ++ dst : EX(write); ++ src : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} + -+ ins_cost(ALU_COST * 2 + LOAD_COST); ++// Integer ALU reg immediate operation ++// E.g. ADDI Rd, Rs1, #imm ++pipe_class ialu_reg_imm(iRegI dst, iRegI src1) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} + -+ format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %} ++// Integer ALU immediate operation (no source operands) ++// E.g. LI Rd, #imm ++pipe_class ialu_imm(iRegI dst) ++%{ ++ single_instruction; ++ dst : EX(write); ++ DECODE : ID; ++ ALU : EX; ++%} + -+ ins_encode %{ -+ __ la(t0, Address(as_Register($mem$$base), $mem$$disp)); -+ __ lr_d($dst$$Register, t0, Assembler::aq); -+ %} ++//------- Multiply pipeline operations -------------------- + -+ ins_pipe(pipe_serial); ++// Multiply reg-reg ++// E.g. MULW Rd, Rs1, Rs2 ++pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ MUL : WR; +%} + -+// Conditional-store of the updated heap-top. -+// Used during allocation of the shared heap. -+// implemented using sc_d on RISCV. -+instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) ++// E.g. MUL RD, Rs1, Rs2 ++pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ -+ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); -+ -+ ins_cost(ALU_COST * 2 + STORE_COST); -+ -+ format %{ -+ "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional" -+ %} ++ single_instruction; ++ fixed_latency(3); // Maximum latency for 64 bit mul ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ MUL : WR; ++%} + -+ ins_encode %{ -+ __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp)); -+ __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl); -+ %} ++//------- Divide pipeline operations -------------------- + -+ ins_pipe(pipe_serial); ++// E.g. DIVW Rd, Rs1, Rs2 ++pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ fixed_latency(8); // Maximum latency for 32 bit divide ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ DIV : WR; +%} + -+// storeLConditional is used by PhaseMacroExpand::expand_lock_node -+// when attempting to rebias a lock towards the current thread. -+instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) ++// E.g. DIV RD, Rs1, Rs2 ++pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) +%{ -+ match(Set cr (StoreLConditional mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST); ++ single_instruction; ++ fixed_latency(16); // Maximum latency for 64 bit divide ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ DIV : WR; ++%} + -+ format %{ -+ "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" -+ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional" -+ %} ++//------- Load pipeline operations ------------------------ + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); -+ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); -+ %} ++// Load - reg, mem ++// E.g. LA Rd, mem ++pipe_class iload_reg_mem(iRegI dst, memory mem) ++%{ ++ single_instruction; ++ dst : WR(write); ++ mem : ID(read); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ ins_pipe(pipe_slow); ++// Load - reg, reg ++// E.g. LD Rd, Rs ++pipe_class iload_reg_reg(iRegI dst, iRegI src) ++%{ ++ single_instruction; ++ dst : WR(write); ++ src : ID(read); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// storeIConditional also has acquire semantics, for no better reason -+// than matching storeLConditional. -+instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) -+%{ -+ match(Set cr (StoreIConditional mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2); -+ -+ format %{ -+ "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" -+ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional" -+ %} -+ -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); -+ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); -+ %} ++//------- Control transfer pipeline operations ------------ + -+ ins_pipe(pipe_slow); ++// Store - zr, mem ++// E.g. SD zr, mem ++pipe_class istore_mem(memory mem) ++%{ ++ single_instruction; ++ mem : ID(read); ++ DECODE : ID; ++ LDST : MEM; +%} + -+// standard CompareAndSwapX when we are using barriers -+// these have higher priority than the rules selected by a predicate -+instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// Store - reg, mem ++// E.g. SD Rs, mem ++pipe_class istore_reg_mem(iRegI src, memory mem) +%{ -+ match(Set res (CompareAndSwapB mem (Binary oldval newval))); ++ single_instruction; ++ mem : ID(read); ++ src : EX(read); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); ++// Store - reg, reg ++// E.g. SD Rs2, Rs1 ++pipe_class istore_reg_reg(iRegI dst, iRegI src) ++%{ ++ single_instruction; ++ dst : ID(read); ++ src : EX(read); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++//------- Store pipeline operations ----------------------- + -+ format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB" -+ %} ++// Branch ++pipe_class pipe_branch() ++%{ ++ single_instruction; ++ DECODE : ID; ++ BRANCH : EX; ++%} + -+ ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ %} ++// Branch ++pipe_class pipe_branch_reg(iRegI src) ++%{ ++ single_instruction; ++ src : ID(read); ++ DECODE : ID; ++ BRANCH : EX; ++%} + -+ ins_pipe(pipe_slow); ++// Compare & Branch ++// E.g. BEQ Rs1, Rs2, L ++pipe_class pipe_cmp_branch(iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ BRANCH : EX; +%} + -+instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// E.g. BEQZ Rs, L ++pipe_class pipe_cmpz_branch(iRegI src) +%{ -+ match(Set res (CompareAndSwapS mem (Binary oldval newval))); ++ single_instruction; ++ src : ID(read); ++ DECODE : ID; ++ BRANCH : EX; ++%} + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); ++//------- Synchronisation operations ---------------------- ++// Any operation requiring serialization ++// E.g. FENCE/Atomic Ops/Load Acquire/Store Release ++pipe_class pipe_serial() ++%{ ++ single_instruction; ++ force_serialization; ++ fixed_latency(16); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++pipe_class pipe_slow() ++%{ ++ instruction_count(10); ++ multiple_bundles; ++ force_serialization; ++ fixed_latency(16); ++ DECODE : ID; ++ LDST : MEM; ++%} + -+ format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS" -+ %} ++// Empty pipeline class ++pipe_class pipe_class_empty() ++%{ ++ single_instruction; ++ fixed_latency(0); ++%} + -+ ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ %} ++// Default pipeline class. ++pipe_class pipe_class_default() ++%{ ++ single_instruction; ++ fixed_latency(2); ++%} + -+ ins_pipe(pipe_slow); ++// Pipeline class for compares. ++pipe_class pipe_class_compare() ++%{ ++ single_instruction; ++ fixed_latency(16); +%} + -+instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) ++// Pipeline class for memory operations. ++pipe_class pipe_class_memory() +%{ -+ match(Set res (CompareAndSwapI mem (Binary oldval newval))); ++ single_instruction; ++ fixed_latency(16); ++%} + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++// Pipeline class for call. ++pipe_class pipe_class_call() ++%{ ++ single_instruction; ++ fixed_latency(100); ++%} + -+ format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI" -+ %} ++// Define the class for the Nop node. ++define %{ ++ MachNop = pipe_class_empty; ++%} ++%} ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// rrspectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. + -+ ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval)); ++// ============================================================================ ++// Memory (Load/Store) Instructions + -+ ins_pipe(pipe_slow); -+%} ++// Load Instructions + -+instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) ++// Load Byte (8 bit signed) ++instruct loadB(iRegINoSp dst, memory mem) +%{ -+ match(Set res (CompareAndSwapL mem (Binary oldval newval))); ++ match(Set dst (LoadB mem)); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST); ++ format %{ "lb $dst, $mem\t# byte, #@loadB" %} + -+ format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL" ++ ins_encode %{ ++ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); -+ -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++// Load Byte (8 bit signed) into long ++instruct loadB2L(iRegLNoSp dst, memory mem) +%{ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ match(Set dst (ConvI2L (LoadB mem))); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST); ++ format %{ "lb $dst, $mem\t# byte, #@loadB2L" %} + -+ format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP" ++ ins_encode %{ ++ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); -+ -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) ++// Load Byte (8 bit unsigned) ++instruct loadUB(iRegINoSp dst, memory mem) +%{ -+ match(Set res (CompareAndSwapN mem (Binary oldval newval))); ++ match(Set dst (LoadUB mem)); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST); ++ format %{ "lbu $dst, $mem\t# byte, #@loadUB" %} + -+ format %{ -+ "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN" ++ ins_encode %{ ++ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval)); -+ -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+// alternative CompareAndSwapX when we are eliding barriers -+instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// Load Byte (8 bit unsigned) into long ++instruct loadUB2L(iRegLNoSp dst, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndSwapB mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ match(Set dst (ConvI2L (LoadUB mem))); + -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq" -+ %} ++ ins_cost(LOAD_COST); ++ format %{ "lbu $dst, $mem\t# byte, #@loadUB2L" %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// Load Short (16 bit signed) ++instruct loadS(iRegINoSp dst, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndSwapS mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ match(Set dst (LoadS mem)); + -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq" -+ %} ++ ins_cost(LOAD_COST); ++ format %{ "lh $dst, $mem\t# short, #@loadS" %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, -+ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) ++// Load Short (16 bit signed) into long ++instruct loadS2L(iRegLNoSp dst, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndSwapI mem (Binary oldval newval))); ++ match(Set dst (ConvI2L (LoadS mem))); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST); ++ format %{ "lh $dst, $mem\t# short, #@loadS2L" %} + -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq" ++ ins_encode %{ ++ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval)); -+ -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) ++// Load Char (16 bit unsigned) ++instruct loadUS(iRegINoSp dst, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndSwapL mem (Binary oldval newval))); ++ match(Set dst (LoadUS mem)); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST); ++ format %{ "lhu $dst, $mem\t# short, #@loadUS" %} + -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq" ++ ins_encode %{ ++ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); -+ -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++// Load Short/Char (16 bit unsigned) into long ++instruct loadUS2L(iRegLNoSp dst, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ match(Set dst (ConvI2L (LoadUS mem))); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST); ++ format %{ "lhu $dst, $mem\t# short, #@loadUS2L" %} + -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq" ++ ins_encode %{ ++ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); -+ -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) ++// Load Integer (32 bit signed) ++instruct loadI(iRegINoSp dst, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndSwapN mem (Binary oldval newval))); ++ match(Set dst (LoadI mem)); + -+ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); ++ ins_cost(LOAD_COST); ++ format %{ "lw $dst, $mem\t# int, #@loadI" %} + -+ format %{ -+ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" -+ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq" ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval)); -+ -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+// Sundry CAS operations. Note that release is always true, -+// regardless of the memory ordering of the CAS. This is because we -+// need the volatile case to be sequentially consistent but there is -+// no trailing StoreLoad barrier emitted by C2. Unfortunately we -+// can't check the type of memory ordering here, so we always emit a -+// sc_d(w) with rl bit set. -+instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// Load Integer (32 bit signed) into long ++instruct loadI2L(iRegLNoSp dst, memory mem) +%{ -+ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ match(Set dst (ConvI2L (LoadI mem))); + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB" -+ %} ++ ins_cost(LOAD_COST); ++ format %{ "lw $dst, $mem\t# int, #@loadI2L" %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// Load Integer (32 bit unsigned) into long ++instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) +%{ -+ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS" -+ %} ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# int, #@loadUI2L" %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++// Load Long (64 bit signed) ++instruct loadL(iRegLNoSp dst, memory mem) +%{ -+ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); -+ -+ effect(TEMP_DEF res); ++ match(Set dst (LoadL mem)); + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI" -+ %} ++ ins_cost(LOAD_COST); ++ format %{ "ld $dst, $mem\t# int, #@loadL" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) ++// Load Range ++instruct loadRange(iRegINoSp dst, memory mem) +%{ -+ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); -+ -+ effect(TEMP_DEF res); ++ match(Set dst (LoadRange mem)); + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL" -+ %} ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# range, #@loadRange" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) ++// Load Pointer ++instruct loadP(iRegPNoSp dst, memory mem) +%{ -+ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3); -+ -+ effect(TEMP_DEF res); ++ match(Set dst (LoadP mem)); + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN" -+ %} ++ ins_cost(LOAD_COST); ++ format %{ "ld $dst, $mem\t# ptr, #@loadP" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) ++// Load Compressed Pointer ++instruct loadN(iRegNNoSp dst, memory mem) +%{ -+ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); -+ -+ effect(TEMP_DEF res); ++ match(Set dst (LoadN mem)); + -+ format %{ -+ "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP" -+ %} ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# loadN, compressed ptr, #@loadN" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// Load Klass Pointer ++instruct loadKlass(iRegPNoSp dst, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ match(Set dst (LoadKlass mem)); + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq" -+ %} ++ ins_cost(LOAD_COST); ++ format %{ "ld $dst, $mem\t# class, #@loadKlass" %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// Load Narrow Klass Pointer ++instruct loadNKlass(iRegNNoSp dst, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ match(Set dst (LoadNKlass mem)); + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq" -+ %} ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %} + + ins_encode %{ -+ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(iload_reg_mem); +%} + -+instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++// Load Float ++instruct loadF(fRegF dst, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); -+ -+ effect(TEMP_DEF res); ++ match(Set dst (LoadF mem)); + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq" -+ %} ++ ins_cost(LOAD_COST); ++ format %{ "flw $dst, $mem\t# float, #@loadF" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(fp_load_mem_s); +%} + -+instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) ++// Load Double ++instruct loadD(fRegD dst, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); -+ -+ effect(TEMP_DEF res); ++ match(Set dst (LoadD mem)); + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq" -+ %} ++ ins_cost(LOAD_COST); ++ format %{ "fld $dst, $mem\t# double, #@loadD" %} + + ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(fp_load_mem_d); +%} + -+instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) ++// Load Int Constant ++instruct loadConI(iRegINoSp dst, immI src) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ match(Set dst src); + -+ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ++ ins_cost(ALU_COST); ++ format %{ "li $dst, $src\t# int, #@loadConI" %} + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ins_encode(riscv_enc_li_imm(dst, src)); + -+ effect(TEMP_DEF res); ++ ins_pipe(ialu_imm); ++%} + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq" -+ %} ++// Load Long Constant ++instruct loadConL(iRegLNoSp dst, immL src) ++%{ ++ match(Set dst src); + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ %} ++ ins_cost(ALU_COST); ++ format %{ "li $dst, $src\t# long, #@loadConL" %} + -+ ins_pipe(pipe_slow); ++ ins_encode(riscv_enc_li_imm(dst, src)); ++ ++ ins_pipe(ialu_imm); +%} + -+instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) ++// Load Pointer Constant ++instruct loadConP(iRegPNoSp dst, immP con) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ match(Set dst con); + -+ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# ptr, #@loadConP" %} + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ins_encode(riscv_enc_mov_p(dst, con)); + -+ effect(TEMP_DEF res); ++ ins_pipe(ialu_imm); ++%} + -+ format %{ -+ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq" -+ %} ++// Load Null Pointer Constant ++instruct loadConP0(iRegPNoSp dst, immP0 con) ++%{ ++ match(Set dst con); + -+ ins_encode %{ -+ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); -+ %} ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# NULL ptr, #@loadConP0" %} + -+ ins_pipe(pipe_slow); ++ ins_encode(riscv_enc_mov_zero(dst)); ++ ++ ins_pipe(ialu_imm); +%} + -+instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// Load Pointer Constant One ++instruct loadConP1(iRegPNoSp dst, immP_1 con) +%{ -+ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ match(Set dst con); + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapB" -+ %} ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# load ptr constant one, #@loadConP1" %} + -+ ins_encode %{ -+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ %} ++ ins_encode(riscv_enc_mov_p1(dst)); + -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_imm); +%} + -+instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++// Load Poll Page Constant ++instruct loadConPollPage(iRegPNoSp dst, immPollPage con) +%{ -+ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); ++ match(Set dst con); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); ++ ins_cost(ALU_COST * 6); ++ format %{ "movptr $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %} + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ins_encode(riscv_enc_mov_poll_page(dst, con)); + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapS" -+ %} ++ ins_pipe(ialu_imm); ++%} + -+ ins_encode %{ -+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); -+ %} ++// Load Byte Map Base Constant ++instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) ++%{ ++ match(Set dst con); ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# Byte Map Base, #@loadByteMapBase" %} + -+ ins_pipe(pipe_slow); ++ ins_encode(riscv_enc_mov_byte_map_base(dst)); ++ ++ ins_pipe(ialu_imm); +%} + -+instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++// Load Narrow Pointer Constant ++instruct loadConN(iRegNNoSp dst, immN con) +%{ -+ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ match(Set dst con); + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapI" -+ %} ++ ins_cost(ALU_COST * 4); ++ format %{ "mv $dst, $con\t# compressed ptr, #@loadConN" %} + -+ ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); -+ %} ++ ins_encode(riscv_enc_mov_n(dst, con)); + -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_imm); +%} + -+instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++// Load Narrow Null Pointer Constant ++instruct loadConN0(iRegNNoSp dst, immN0 con) +%{ -+ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ match(Set dst con); + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapL" -+ %} ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# compressed NULL ptr, #@loadConN0" %} + -+ ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); -+ %} ++ ins_encode(riscv_enc_mov_zero(dst)); + -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_imm); +%} + -+instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++// Load Narrow Klass Constant ++instruct loadConNKlass(iRegNNoSp dst, immNKlass con) +%{ -+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++ match(Set dst con); + -+ format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapN" -+ %} ++ ins_cost(ALU_COST * 6); ++ format %{ "mv $dst, $con\t# compressed klass ptr, #@loadConNKlass" %} + -+ ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); -+ %} ++ ins_encode(riscv_enc_mov_nk(dst, con)); + -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_imm); +%} + -+instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) -+%{ -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++// Load Float Constant ++instruct loadConF(fRegF dst, immF con) %{ ++ match(Set dst con); + ++ ins_cost(LOAD_COST); + format %{ -+ "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapP" ++ "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF" + %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ flw(as_FloatRegister($dst$$reg), $constantaddress($con)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(fp_load_constant_s); +%} + -+instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) -+%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); ++instruct loadConF0(fRegF dst, immF0 con) %{ ++ match(Set dst con); + -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ins_cost(XFER_COST); + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapBAcq" -+ %} ++ format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %} + + ins_encode %{ -+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ fmv_w_x(as_FloatRegister($dst$$reg), zr); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(fp_load_constant_s); +%} + -+instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, -+ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) -+%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); -+ -+ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++// Load Double Constant ++instruct loadConD(fRegD dst, immD con) %{ ++ match(Set dst con); + ++ ins_cost(LOAD_COST); + format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapSAcq" ++ "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD" + %} + + ins_encode %{ -+ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ fld(as_FloatRegister($dst$$reg), $constantaddress($con)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(fp_load_constant_d); +%} + -+instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) -+%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); ++instruct loadConD0(fRegD dst, immD0 con) %{ ++ match(Set dst con); + -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ins_cost(XFER_COST); + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapIAcq" -+ %} ++ format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ fmv_d_x(as_FloatRegister($dst$$reg), zr); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(fp_load_constant_d); +%} + -+instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++// Store Instructions ++// Store CMS card-mark Immediate ++instruct storeimmCM0(immI0 zero, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ match(Set mem (StoreCM mem zero)); ++ predicate(unnecessary_storestore(n)); + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapLAcq" -+ %} ++ ins_cost(STORE_COST); ++ format %{ "storestore (elided)\n\t" ++ "sb zr, $mem\t# byte, #@storeimmCM0" %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(istore_mem); +%} + -+instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++// Store CMS card-mark Immediate with intervening StoreStore ++// needed when using CMS with no conditional card marking ++instruct storeimmCM0_ordered(immI0 zero, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++ match(Set mem (StoreCM mem zero)); + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapNAcq" -+ %} ++ ins_cost(ALU_COST + STORE_COST); ++ format %{ "membar(StoreStore)\n\t" ++ "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(istore_mem); +%} + -+instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++// Store Byte ++instruct storeB(iRegIorL2I src, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -+ -+ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ match(Set mem (StoreB mem src)); + -+ format %{ -+ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" -+ "# $res == 1 when success, #@weakCompareAndSwapPAcq" -+ %} ++ ins_cost(STORE_COST); ++ format %{ "sb $src, $mem\t# byte, #@storeB" %} + + ins_encode %{ -+ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, -+ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(istore_reg_mem); +%} + -+instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) ++instruct storeimmB0(immI0 zero, memory mem) +%{ -+ match(Set prev (GetAndSetI mem newv)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem (StoreB mem zero)); + -+ format %{ "atomic_xchgw $prev, $newv, [$mem]\t#@get_and_setI" %} ++ ins_cost(STORE_COST); ++ format %{ "sb zr, $mem\t# byte, #@storeimmB0" %} + + ins_encode %{ -+ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(istore_mem); +%} + -+instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) ++// Store Char/Short ++instruct storeC(iRegIorL2I src, memory mem) +%{ -+ match(Set prev (GetAndSetL mem newv)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem (StoreC mem src)); + -+ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setL" %} ++ ins_cost(STORE_COST); ++ format %{ "sh $src, $mem\t# short, #@storeC" %} + + ins_encode %{ -+ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(istore_reg_mem); +%} + -+instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) ++instruct storeimmC0(immI0 zero, memory mem) +%{ -+ match(Set prev (GetAndSetN mem newv)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem (StoreC mem zero)); + -+ format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %} ++ ins_cost(STORE_COST); ++ format %{ "sh zr, $mem\t# short, #@storeimmC0" %} + + ins_encode %{ -+ __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ __ sh(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(istore_mem); +%} + -+instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) ++// Store Integer ++instruct storeI(iRegIorL2I src, memory mem) +%{ -+ match(Set prev (GetAndSetP mem newv)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem(StoreI mem src)); + -+ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setP" %} ++ ins_cost(STORE_COST); ++ format %{ "sw $src, $mem\t# int, #@storeI" %} + + ins_encode %{ -+ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(istore_reg_mem); +%} + -+instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) ++instruct storeimmI0(immI0 zero, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set prev (GetAndSetI mem newv)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem(StoreI mem zero)); + -+ format %{ "atomic_xchgw_acq $prev, $newv, [$mem]\t#@get_and_setIAcq" %} ++ ins_cost(STORE_COST); ++ format %{ "sw zr, $mem\t# int, #@storeimmI0" %} + + ins_encode %{ -+ __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ __ sw(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(istore_mem); +%} + -+instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) ++// Store Long (64 bit signed) ++instruct storeL(iRegL src, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set prev (GetAndSetL mem newv)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem (StoreL mem src)); + -+ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setLAcq" %} ++ ins_cost(STORE_COST); ++ format %{ "sd $src, $mem\t# long, #@storeL" %} + + ins_encode %{ -+ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(istore_reg_mem); +%} + -+instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) ++// Store Long (64 bit signed) ++instruct storeimmL0(immL0 zero, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set prev (GetAndSetN mem newv)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem (StoreL mem zero)); + -+ format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %} ++ ins_cost(STORE_COST); ++ format %{ "sd zr, $mem\t# long, #@storeimmL0" %} + + ins_encode %{ -+ __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(istore_mem); +%} + -+instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) ++// Store Pointer ++instruct storeP(iRegP src, memory mem) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set prev (GetAndSetP mem newv)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem (StoreP mem src)); + -+ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setPAcq" %} ++ ins_cost(STORE_COST); ++ format %{ "sd $src, $mem\t# ptr, #@storeP" %} + + ins_encode %{ -+ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(istore_reg_mem); +%} + -+instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) ++// Store Pointer ++instruct storeimmP0(immP0 zero, memory mem) +%{ -+ match(Set newval (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem (StoreP mem zero)); + -+ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %} ++ ins_cost(STORE_COST); ++ format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %} + + ins_encode %{ -+ __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base)); ++ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(istore_mem); +%} + -+instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) ++// Store Compressed Pointer ++instruct storeN(iRegN src, memory mem) +%{ -+ predicate(n->as_LoadStore()->result_not_used()); -+ -+ match(Set dummy (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem (StoreN mem src)); + -+ format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %} ++ ins_cost(STORE_COST); ++ format %{ "sw $src, $mem\t# compressed ptr, #@storeN" %} + + ins_encode %{ -+ __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(istore_reg_mem); +%} + -+instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr) ++instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) +%{ -+ match(Set newval (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem (StoreN mem zero)); ++ predicate(Universe::narrow_oop_base() == NULL && ++ Universe::narrow_klass_base() == NULL); + -+ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %} ++ ins_cost(STORE_COST); ++ format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} + + ins_encode %{ -+ __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base)); ++ __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(istore_reg_mem); +%} + -+instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr) ++// Store Float ++instruct storeF(fRegF src, memory mem) +%{ -+ predicate(n->as_LoadStore()->result_not_used()); -+ -+ match(Set dummy (GetAndAddL mem incr)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem (StoreF mem src)); + -+ format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %} ++ ins_cost(STORE_COST); ++ format %{ "fsw $src, $mem\t# float, #@storeF" %} + + ins_encode %{ -+ __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base)); ++ __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(fp_store_reg_s); +%} + -+instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) ++// Store Double ++instruct storeD(fRegD src, memory mem) +%{ -+ match(Set newval (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem (StoreD mem src)); + -+ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %} ++ ins_cost(STORE_COST); ++ format %{ "fsd $src, $mem\t# double, #@storeD" %} + + ins_encode %{ -+ __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(fp_store_reg_d); +%} + -+instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) ++// Store Compressed Klass Pointer ++instruct storeNKlass(iRegN src, memory mem) +%{ -+ predicate(n->as_LoadStore()->result_not_used()); -+ -+ match(Set dummy (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); ++ match(Set mem (StoreNKlass mem src)); + -+ format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %} ++ ins_cost(STORE_COST); ++ format %{ "sw $src, $mem\t# compressed klass ptr, #@storeNKlass" %} + + ins_encode %{ -+ __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(istore_reg_mem); +%} + -+instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr) ++// ============================================================================ ++// Atomic operation instructions ++// ++// Intel and SPARC both implement Ideal Node LoadPLocked and ++// Store{PIL}Conditional instructions using a normal load for the ++// LoadPLocked and a CAS for the Store{PIL}Conditional. ++// ++// The ideal code appears only to use LoadPLocked/storePConditional as a ++// pair to lock object allocations from Eden space when not using ++// TLABs. ++// ++// There does not appear to be a Load{IL}Locked Ideal Node and the ++// Ideal code appears to use Store{IL}Conditional as an alias for CAS ++// and to use StoreIConditional only for 32-bit and StoreLConditional ++// only for 64-bit. ++// ++// We implement LoadPLocked and storePConditional instructions using, ++// respectively the RISCV hw load-reserve and store-conditional ++// instructions. Whereas we must implement each of ++// Store{IL}Conditional using a CAS which employs a pair of ++// instructions comprising a load-reserve followed by a ++// store-conditional. ++ ++ ++// Locked-load (load reserved) of the current heap-top ++// used when updating the eden heap top ++// implemented using lr_d on RISCV64 ++instruct loadPLocked(iRegPNoSp dst, indirect mem) +%{ -+ match(Set newval (GetAndAddI mem incr)); ++ match(Set dst (LoadPLocked mem)); + -+ ins_cost(ALU_COST); ++ ins_cost(ALU_COST * 2 + LOAD_COST); + -+ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %} ++ format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %} + + ins_encode %{ -+ __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base)); ++ __ la(t0, Address(as_Register($mem$$base), $mem$$disp)); ++ __ lr_d($dst$$Register, t0, Assembler::aq); + %} + + ins_pipe(pipe_serial); +%} + -+instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr) ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++// implemented using sc_d on RISCV64. ++instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) +%{ -+ predicate(n->as_LoadStore()->result_not_used()); -+ -+ match(Set dummy (GetAndAddI mem incr)); ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); + -+ ins_cost(ALU_COST); ++ ins_cost(ALU_COST * 2 + STORE_COST); + -+ format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %} ++ format %{ ++ "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional" ++ %} + + ins_encode %{ -+ __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base)); ++ __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp)); ++ __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl); + %} + + ins_pipe(pipe_serial); +%} + -+instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) ++// storeLConditional is used by PhaseMacroExpand::expand_lock_node ++// when attempting to rebias a lock towards the current thread. We ++// must use the acquire form of cmpxchg in order to guarantee acquire ++// semantics in this case. ++instruct storeLConditional(indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set newval (GetAndAddL mem incr)); ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); + -+ ins_cost(ALU_COST); ++ ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST); + -+ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %} ++ format %{ ++ "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" ++ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional" ++ %} + + ins_encode %{ -+ __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); ++ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(pipe_slow); +%} + -+instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); -+ -+ match(Set dummy (GetAndAddL mem incr)); ++// storeIConditional also has acquire semantics, for no better reason ++// than matching storeLConditional. ++instruct storeIConditional(indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) ++%{ ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); + -+ ins_cost(ALU_COST); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2); + -+ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %} ++ format %{ ++ "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" ++ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional" ++ %} + + ins_encode %{ -+ __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); ++ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(pipe_slow); +%} + -+instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr) ++// standard CompareAndSwapX when we are using barriers ++// these have higher priority than the rules selected by a predicate ++instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); ++ match(Set res (CompareAndSwapB mem (Binary oldval newval))); + -+ match(Set newval (GetAndAddL mem incr)); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); + -+ ins_cost(ALU_COST); ++ effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + -+ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %} ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB" ++ %} + + ins_encode %{ -+ __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(pipe_slow); +%} + -+instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr) ++instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); ++ match(Set res (CompareAndSwapS mem (Binary oldval newval))); + -+ match(Set dummy (GetAndAddL mem incr)); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); + -+ ins_cost(ALU_COST); ++ effect(TEMP_DEF res, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + -+ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %} ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS" ++ %} + + ins_encode %{ -+ __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(pipe_slow); +%} + -+instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) ++instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set newval (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); ++ match(Set res (CompareAndSwapI mem (Binary oldval newval))); + -+ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_encode %{ -+ __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base)); ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI" + %} + -+ ins_pipe(pipe_serial); ++ ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) ++instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) +%{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); -+ -+ match(Set dummy (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); ++ match(Set res (CompareAndSwapL mem (Binary oldval newval))); + -+ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_encode %{ -+ __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base)); ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL" + %} + -+ ins_pipe(pipe_serial); ++ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr) ++instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) +%{ -+ predicate(needs_acquiring_load_exclusive(n)); -+ -+ match(Set newval (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); + -+ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_encode %{ -+ __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base)); ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP" + %} + -+ ins_pipe(pipe_serial); ++ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr) ++instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) +%{ -+ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); -+ -+ match(Set dummy (GetAndAddI mem incr)); -+ -+ ins_cost(ALU_COST); ++ match(Set res (CompareAndSwapN mem (Binary oldval newval))); + -+ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); + -+ ins_encode %{ -+ __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base)); ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN" + %} + -+ ins_pipe(pipe_serial); ++ ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+// ============================================================================ -+// Arithmetic Instructions -+// ++// alternative CompareAndSwapX when we are eliding barriers ++instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+// Integer Addition ++ match(Set res (CompareAndSwapB mem (Binary oldval newval))); + -+// TODO -+// these currently employ operations which do not set CR and hence are -+// not flagged as killing CR but we would like to isolate the cases -+// where we want to set flags from those where we don't. need to work -+// out how to do that. -+instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (AddI src1 src2)); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); + -+ ins_cost(ALU_COST); -+ format %{ "addw $dst, $src1, $src2\t#@addI_reg_reg" %} ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq" ++ %} + + ins_encode %{ -+ __ addw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_slow); +%} + -+instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ -+ match(Set dst (AddI src1 src2)); -+ -+ ins_cost(ALU_COST); -+ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm" %} ++instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_encode %{ -+ int32_t con = (int32_t)$src2$$constant; -+ __ addiw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ $src2$$constant); -+ %} ++ match(Set res (CompareAndSwapS mem (Binary oldval newval))); + -+ ins_pipe(ialu_reg_imm); -+%} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); + -+instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{ -+ match(Set dst (AddI (ConvL2I src1) src2)); ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+ ins_cost(ALU_COST); -+ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm_l2i" %} ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq" ++ %} + + ins_encode %{ -+ __ addiw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ $src2$$constant); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(pipe_slow); +%} + -+// Pointer Addition -+instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{ -+ match(Set dst (AddP src1 src2)); ++instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST); -+ format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %} ++ match(Set res (CompareAndSwapI mem (Binary oldval newval))); + -+ ins_encode %{ -+ __ add(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq" + %} + -+ ins_pipe(ialu_reg_reg); -+%} ++ ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval)); + -+// If we shift more than 32 bits, we need not convert I2L. -+instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{ -+ match(Set dst (LShiftL (ConvI2L src) scale)); -+ ins_cost(ALU_COST); -+ format %{ "slli $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %} ++ ins_pipe(pipe_slow); ++%} + -+ ins_encode %{ -+ __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63); -+ %} ++instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_pipe(ialu_reg_shift); -+%} ++ match(Set res (CompareAndSwapL mem (Binary oldval newval))); + -+// Pointer Immediate Addition -+// n.b. this needs to be more expensive than using an indirect memory -+// operand -+instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{ -+ match(Set dst (AddP src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "addi $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_encode %{ -+ // src2 is imm, so actually call the addi -+ __ add(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ $src2$$constant); ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq" + %} + -+ ins_pipe(ialu_reg_imm); -+%} ++ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); + -+// Long Addition -+instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (AddL src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "add $dst, $src1, $src2\t#@addL_reg_reg" %} ++ ins_pipe(pipe_slow); ++%} + -+ ins_encode %{ -+ __ add(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); -+ %} ++instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_pipe(ialu_reg_reg); -+%} ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); + -+// No constant pool entries requiredLong Immediate Addition. -+instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ -+ match(Set dst (AddL src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "addi $dst, $src1, $src2\t#@addL_reg_imm" %} ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); + -+ ins_encode %{ -+ // src2 is imm, so actually call the addi -+ __ add(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ $src2$$constant); ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq" + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+// Integer Subtraction -+instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (SubI src1 src2)); ++instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST); -+ format %{ "subw $dst, $src1, $src2\t#@subI_reg_reg" %} ++ match(Set res (CompareAndSwapN mem (Binary oldval newval))); + -+ ins_encode %{ -+ __ subw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq" + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); +%} + -+// Immediate Subtraction -+instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{ -+ match(Set dst (SubI src1 src2)); ++// Sundry CAS operations. Note that release is always true, ++// regardless of the memory ordering of the CAS. This is because we ++// need the volatile case to be sequentially consistent but there is ++// no trailing StoreLoad barrier emitted by C2. Unfortunately we ++// can't check the type of memory ordering here, so we always emit a ++// sc_d(w) with rl bit set. ++instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); + -+ ins_cost(ALU_COST); -+ format %{ "addiw $dst, $src1, -$src2\t#@subI_reg_imm" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB" ++ %} + + ins_encode %{ -+ // src2 is imm, so actually call the addiw -+ __ subw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ $src2$$constant); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(pipe_slow); +%} + -+// Long Subtraction -+instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (SubL src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "sub $dst, $src1, $src2\t#@subL_reg_reg" %} ++instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + -+ ins_encode %{ -+ __ sub(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); -+ %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); + -+ ins_pipe(ialu_reg_reg); -+%} ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+// No constant pool entries requiredLong Immediate Subtraction. -+instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{ -+ match(Set dst (SubL src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "addi $dst, $src1, -$src2\t#@subL_reg_imm" %} ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS" ++ %} + + ins_encode %{ -+ // src2 is imm, so actually call the addi -+ __ sub(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ $src2$$constant); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(pipe_slow); +%} + -+// Integer Negation (special case for sub) ++instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); + -+instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{ -+ match(Set dst (SubI zero src)); -+ ins_cost(ALU_COST); -+ format %{ "subw $dst, x0, $src\t# int, #@negI_reg" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI" ++ %} + + ins_encode %{ -+ // actually call the subw -+ __ negw(as_Register($dst$$reg), -+ as_Register($src$$reg)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_slow); +%} + -+// Long Negation ++instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); + -+instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{ -+ match(Set dst (SubL zero src)); -+ ins_cost(ALU_COST); -+ format %{ "sub $dst, x0, $src\t# long, #@negL_reg" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL" ++ %} + + ins_encode %{ -+ // actually call the sub -+ __ neg(as_Register($dst$$reg), -+ as_Register($src$$reg)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_slow); +%} + -+// Integer Multiply ++instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + -+instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (MulI src1 src2)); -+ ins_cost(IMUL_COST); -+ format %{ "mulw $dst, $src1, $src2\t#@mulI" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN" ++ %} + -+ //this means 2 word multi, and no sign extend to 64 bits + ins_encode %{ -+ // riscv64 mulw will sign-extension to high 32 bits in dst reg -+ __ mulw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(imul_reg_reg); ++ ins_pipe(pipe_slow); +%} + -+// Long Multiply ++instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + -+instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (MulL src1 src2)); -+ ins_cost(IMUL_COST); -+ format %{ "mul $dst, $src1, $src2\t#@mulL" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP" ++ %} + + ins_encode %{ -+ __ mul(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(lmul_reg_reg); ++ ins_pipe(pipe_slow); +%} + -+instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2) ++instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) +%{ -+ match(Set dst (MulHiL src1 src2)); -+ ins_cost(IMUL_COST); -+ format %{ "mulh $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %} ++ predicate(needs_acquiring_load_reserved(n)); ++ ++ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq" ++ %} + + ins_encode %{ -+ __ mulh(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} + -+ ins_pipe(lmul_reg_reg); ++ ins_pipe(pipe_slow); +%} + -+// Integer Divide ++instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (DivI src1 src2)); -+ ins_cost(IDIVSI_COST); -+ format %{ "divw $dst, $src1, $src2\t#@divI"%} ++ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); + -+ ins_encode(riscv_enc_divw(dst, src1, src2)); -+ ins_pipe(idiv_reg_reg); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); + -+instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{ -+ match(Set dst (URShiftI (RShiftI src1 div1) div2)); -+ ins_cost(ALU_COST); -+ format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %} ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq" ++ %} + + ins_encode %{ -+ __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31); ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); + %} -+ ins_pipe(ialu_reg_shift); ++ ++ ins_pipe(pipe_slow); +%} + -+// Long Divide ++instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (DivL src1 src2)); -+ ins_cost(IDIVDI_COST); -+ format %{ "div $dst, $src1, $src2\t#@divL" %} ++ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); + -+ ins_encode(riscv_enc_div(dst, src1, src2)); -+ ins_pipe(ldiv_reg_reg); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + -+instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{ -+ match(Set dst (URShiftL (RShiftL src1 div1) div2)); -+ ins_cost(ALU_COST); -+ format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %} ++ effect(TEMP_DEF res); + -+ ins_encode %{ -+ __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63); ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq" + %} -+ ins_pipe(ialu_reg_shift); -+%} -+ -+// Integer Remainder + -+instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (ModI src1 src2)); -+ ins_cost(IDIVSI_COST); -+ format %{ "remw $dst, $src1, $src2\t#@modI" %} ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} + -+ ins_encode(riscv_enc_modw(dst, src1, src2)); -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_slow); +%} + -+// Long Remainder ++instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (ModL src1 src2)); -+ ins_cost(IDIVDI_COST); -+ format %{ "rem $dst, $src1, $src2\t#@modL" %} ++ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); + -+ ins_encode(riscv_enc_mod(dst, src1, src2)); -+ ins_pipe(ialu_reg_reg); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + -+// Integer Shifts ++ effect(TEMP_DEF res); + -+// Shift Left Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount -+instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (LShiftI src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "sllw $dst, $src1, $src2\t#@lShiftI_reg_reg" %} ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq" ++ %} + + ins_encode %{ -+ __ sllw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg_reg_vshift); ++ ins_pipe(pipe_slow); +%} + -+// Shift Left Immediate -+instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ -+ match(Set dst (LShiftI src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "slliw $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %} ++instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_encode %{ -+ // the shift amount is encoded in the lower -+ // 5 bits of the I-immediate field for RV32I -+ __ slliw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x1f); -+ %} ++ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + -+ ins_pipe(ialu_reg_shift); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + -+// Shift Right Logical Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount -+instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (URShiftI src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "srlw $dst, $src1, $src2\t#@urShiftI_reg_reg" %} ++ effect(TEMP_DEF res); + -+ ins_encode %{ -+ __ srlw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq" + %} + -+ ins_pipe(ialu_reg_reg_vshift); -+%} -+ -+// Shift Right Logical Immediate -+instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ -+ match(Set dst (URShiftI src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "srliw $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %} -+ + ins_encode %{ -+ // the shift amount is encoded in the lower -+ // 6 bits of the I-immediate field for RV64I -+ __ srliw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x1f); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(pipe_slow); +%} + -+// Shift Right Arithmetic Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount -+instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ -+ match(Set dst (RShiftI src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "sraw $dst, $src1, $src2\t#@rShiftI_reg_reg" %} ++instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_encode %{ -+ // riscv will sign-ext dst high 32 bits -+ __ sraw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); -+ %} ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + -+ ins_pipe(ialu_reg_reg_vshift); -+%} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); + -+// Shift Right Arithmetic Immediate -+instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ -+ match(Set dst (RShiftI src1 src2)); -+ ins_cost(ALU_COST); -+ format %{ "sraiw $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %} ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq" ++ %} + + ins_encode %{ -+ // riscv will sign-ext dst high 32 bits -+ __ sraiw(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x1f); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(pipe_slow); +%} + -+// Long Shifts ++instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); + -+// Shift Left Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount -+instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ -+ match(Set dst (LShiftL src1 src2)); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); + -+ ins_cost(ALU_COST); -+ format %{ "sll $dst, $src1, $src2\t#@lShiftL_reg_reg" %} ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapB" ++ %} + + ins_encode %{ -+ __ sll(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg_reg_vshift); ++ ins_pipe(pipe_slow); +%} + -+// Shift Left Immediate -+instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ -+ match(Set dst (LShiftL src1 src2)); ++instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); + -+ ins_cost(ALU_COST); -+ format %{ "slli $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapS" ++ %} + + ins_encode %{ -+ // the shift amount is encoded in the lower -+ // 6 bits of the I-immediate field for RV64I -+ __ slli(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x3f); ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(pipe_slow); +%} + -+// Shift Right Logical Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount -+instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ -+ match(Set dst (URShiftL src1 src2)); ++instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); + -+ ins_cost(ALU_COST); -+ format %{ "srl $dst, $src1, $src2\t#@urShiftL_reg_reg" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapI" ++ %} + + ins_encode %{ -+ __ srl(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg_reg_vshift); ++ ins_pipe(pipe_slow); +%} + -+// Shift Right Logical Immediate -+instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ -+ match(Set dst (URShiftL src1 src2)); ++instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); + -+ ins_cost(ALU_COST); -+ format %{ "srli $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapL" ++ %} + + ins_encode %{ -+ // the shift amount is encoded in the lower -+ // 6 bits of the I-immediate field for RV64I -+ __ srli(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x3f); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(pipe_slow); +%} + -+// A special-case pattern for card table stores. -+instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ -+ match(Set dst (URShiftL (CastP2X src1) src2)); ++instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + -+ ins_cost(ALU_COST); -+ format %{ "srli $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapN" ++ %} + + ins_encode %{ -+ // the shift amount is encoded in the lower -+ // 6 bits of the I-immediate field for RV64I -+ __ srli(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x3f); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(pipe_slow); +%} + -+// Shift Right Arithmetic Register -+// In RV64I, only the low 5 bits of src2 are considered for the shift amount -+instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ -+ match(Set dst (RShiftL src1 src2)); ++instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + -+ ins_cost(ALU_COST); -+ format %{ "sra $dst, $src1, $src2\t#@rShiftL_reg_reg" %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapP" ++ %} + + ins_encode %{ -+ __ sra(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg_reg_vshift); ++ ins_pipe(pipe_slow); +%} + -+// Shift Right Arithmetic Immediate -+instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ -+ match(Set dst (RShiftL src1 src2)); ++instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST); -+ format %{ "srai $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %} ++ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); + -+ ins_encode %{ -+ // the shift amount is encoded in the lower -+ // 6 bits of the I-immediate field for RV64I -+ __ srai(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (unsigned) $src2$$constant & 0x3f); -+ %} ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); + -+ ins_pipe(ialu_reg_shift); -+%} ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + -+instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{ -+ match(Set dst (XorI src1 m1)); -+ ins_cost(ALU_COST); -+ format %{ "xori $dst, $src1, -1\t#@regI_not_reg" %} ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapBAcq" ++ %} + + ins_encode %{ -+ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1); ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_slow); +%} + -+instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{ -+ match(Set dst (XorL src1 m1)); -+ ins_cost(ALU_COST); -+ format %{ "xori $dst, $src1, -1\t#@regL_not_reg" %} ++instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); ++ ++ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapSAcq" ++ %} + + ins_encode %{ -+ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1); ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_slow); +%} + ++instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+// ============================================================================ -+// Floating Point Arithmetic Instructions ++ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); + -+instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ -+ match(Set dst (AddF src1 src2)); ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fadd.s $dst, $src1, $src2\t#@addF_reg_reg" %} ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapIAcq" ++ %} + + ins_encode %{ -+ __ fadd_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(fp_dop_reg_reg_s); ++ ins_pipe(pipe_slow); +%} + -+instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ -+ match(Set dst (AddD src1 src2)); ++instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fadd.d $dst, $src1, $src2\t#@addD_reg_reg" %} ++ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapLAcq" ++ %} + + ins_encode %{ -+ __ fadd_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(fp_dop_reg_reg_d); ++ ins_pipe(pipe_slow); +%} + -+instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ -+ match(Set dst (SubF src1 src2)); ++instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fsub.s $dst, $src1, $src2\t#@subF_reg_reg" %} ++ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapNAcq" ++ %} + + ins_encode %{ -+ __ fsub_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(fp_dop_reg_reg_s); ++ ins_pipe(pipe_slow); +%} + -+instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ -+ match(Set dst (SubD src1 src2)); ++instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fsub.d $dst, $src1, $src2\t#@subD_reg_reg" %} ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "xori $res, $res, 1\t# $res == 1 when success, #@weakCompareAndSwapPAcq" ++ %} + + ins_encode %{ -+ __ fsub_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ __ xori($res$$Register, $res$$Register, 1); + %} + -+ ins_pipe(fp_dop_reg_reg_d); ++ ins_pipe(pipe_slow); +%} + -+instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ -+ match(Set dst (MulF src1 src2)); ++instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) ++%{ ++ match(Set prev (GetAndSetI mem newv)); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fmul.s $dst, $src1, $src2\t#@mulF_reg_reg" %} ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchgw $prev, $newv, [$mem]\t#@get_and_setI" %} + + ins_encode %{ -+ __ fmul_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_dop_reg_reg_s); ++ ins_pipe(pipe_serial); +%} + -+instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ -+ match(Set dst (MulD src1 src2)); ++instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) ++%{ ++ match(Set prev (GetAndSetL mem newv)); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fmul.d $dst, $src1, $src2\t#@mulD_reg_reg" %} ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setL" %} + + ins_encode %{ -+ __ fmul_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_dop_reg_reg_d); ++ ins_pipe(pipe_serial); +%} + -+// src1 * src2 + src3 -+instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaF src3 (Binary src1 src2))); ++instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) ++%{ ++ match(Set prev (GetAndSetN mem newv)); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fmadd.s $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %} ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %} + + ins_encode %{ -+ __ fmadd_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_serial); +%} + -+// src1 * src2 + src3 -+instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaD src3 (Binary src1 src2))); ++instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) ++%{ ++ match(Set prev (GetAndSetP mem newv)); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fmadd.d $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %} ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setP" %} + + ins_encode %{ -+ __ fmadd_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_serial); +%} + -+// src1 * src2 - src3 -+instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaF (NegF src3) (Binary src1 src2))); ++instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fmsub.s $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %} ++ match(Set prev (GetAndSetI mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchgw_acq $prev, $newv, [$mem]\t#@get_and_setIAcq" %} + + ins_encode %{ -+ __ fmsub_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_serial); +%} + -+// src1 * src2 - src3 -+instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaD (NegD src3) (Binary src1 src2))); ++instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fmsub.d $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %} ++ match(Set prev (GetAndSetL mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setLAcq" %} + + ins_encode %{ -+ __ fmsub_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_serial); +%} + -+// -src1 * src2 + src3 -+instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaF src3 (Binary (NegF src1) src2))); -+ match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); ++instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fnmsub.s $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %} ++ match(Set prev (GetAndSetN mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %} + + ins_encode %{ -+ __ fnmsub_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_serial); +%} + -+// -src1 * src2 + src3 -+instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaD src3 (Binary (NegD src1) src2))); -+ match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); ++instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fnmsub.d $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %} ++ match(Set prev (GetAndSetP mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setPAcq" %} + + ins_encode %{ -+ __ fnmsub_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_serial); +%} + -+// -src1 * src2 - src3 -+instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); -+ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); ++instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) ++%{ ++ match(Set newval (GetAndAddL mem incr)); + -+ ins_cost(FMUL_SINGLE_COST); -+ format %{ "fnmadd.s $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %} ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %} + + ins_encode %{ -+ __ fnmadd_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_serial); +%} + -+// -src1 * src2 - src3 -+instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ -+ predicate(UseFMA); -+ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); -+ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); ++instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used()); + -+ ins_cost(FMUL_DOUBLE_COST); -+ format %{ "fnmadd.d $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %} ++ match(Set dummy (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %} + + ins_encode %{ -+ __ fnmadd_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), -+ as_FloatRegister($src3$$reg)); ++ __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_serial); +%} + -+// Math.max(FF)F -+instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ -+ match(Set dst (MaxF src1 src2)); -+ effect(TEMP_DEF dst, USE src1, USE src2); ++instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr) ++%{ ++ match(Set newval (GetAndAddL mem incr)); + -+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST); -+ format %{ "fsflags zr\t#@maxF_reg_reg\n\t" -+ "fmax.s $dst, $src1, $src2\n\t" -+ "flt.s zr, $src1, $src2\n\t" -+ "frflags t0\n\t" -+ "beqz t0, Ldone\n\t" -+ "fadd.s $dst, $src1, $src2" %} ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %} + + ins_encode %{ -+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ false); ++ __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_dop_reg_reg_s); ++ ins_pipe(pipe_serial); +%} + -+// Math.min(FF)F -+instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ -+ match(Set dst (MinF src1 src2)); -+ effect(TEMP_DEF dst, USE src1, USE src2); ++instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used()); ++ ++ match(Set dummy (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); + -+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST); -+ format %{ "fsflags zr\t#@minF_reg_reg\n\t" -+ "fmin.s $dst, $src1, $src2\n\t" -+ "flt.s zr, $src1, $src2\n\t" -+ "frflags t0\n\t" -+ "beqz t0, Ldone\n\t" -+ "fadd.s $dst, $src1, $src2" %} ++ format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %} + + ins_encode %{ -+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ true); ++ __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_dop_reg_reg_s); ++ ins_pipe(pipe_serial); +%} + -+// Math.max(DD)D -+instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ -+ match(Set dst (MaxD src1 src2)); -+ effect(TEMP_DEF dst, USE src1, USE src2); ++instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) ++%{ ++ match(Set newval (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); + -+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST); -+ format %{ "fsflags zr\t#@maxD_reg_reg\n\t" -+ "fmax.d $dst, $src1, $src2\n\t" -+ "flt.d zr, $src1, $src2\n\t" -+ "frflags t0\n\t" -+ "beqz t0, Ldone\n\t" -+ "fadd.d $dst, $src1, $src2" %} ++ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %} + + ins_encode %{ -+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ false); ++ __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_dop_reg_reg_d); ++ ins_pipe(pipe_serial); +%} + -+// Math.min(DD)D -+instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ -+ match(Set dst (MinD src1 src2)); -+ effect(TEMP_DEF dst, USE src1, USE src2); ++instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used()); ++ ++ match(Set dummy (GetAndAddI mem incr)); + -+ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST); -+ format %{ "fsflags zr\t#@minD_reg_reg\n\t" -+ "fmin.d $dst, $src1, $src2\n\t" -+ "flt.d zr, $src1, $src2\n\t" -+ "frflags t0\n\t" -+ "beqz t0, Ldone\n\t" -+ "fadd.d $dst, $src1, $src2" %} ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %} + + ins_encode %{ -+ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ true); ++ __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_dop_reg_reg_d); ++ ins_pipe(pipe_serial); +%} + -+instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ -+ match(Set dst (DivF src1 src2)); ++instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr) ++%{ ++ match(Set newval (GetAndAddI mem incr)); + -+ ins_cost(FDIV_COST); -+ format %{ "fdiv.s $dst, $src1, $src2\t#@divF_reg_reg" %} ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %} + + ins_encode %{ -+ __ fdiv_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_div_s); ++ ins_pipe(pipe_serial); +%} + -+instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ -+ match(Set dst (DivD src1 src2)); ++instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used()); + -+ ins_cost(FDIV_COST); -+ format %{ "fdiv.d $dst, $src1, $src2\t#@divD_reg_reg" %} ++ match(Set dummy (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %} + + ins_encode %{ -+ __ fdiv_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src1$$reg), -+ as_FloatRegister($src2$$reg)); ++ __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_div_d); ++ ins_pipe(pipe_serial); +%} + -+instruct negF_reg_reg(fRegF dst, fRegF src) %{ -+ match(Set dst (NegF src)); ++instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(XFER_COST); -+ format %{ "fsgnjn.s $dst, $src, $src\t#@negF_reg_reg" %} ++ match(Set newval (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %} + + ins_encode %{ -+ __ fneg_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src$$reg)); ++ __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_uop_s); ++ ins_pipe(pipe_serial); +%} + -+instruct negD_reg_reg(fRegD dst, fRegD src) %{ -+ match(Set dst (NegD src)); ++instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + -+ ins_cost(XFER_COST); -+ format %{ "fsgnjn.d $dst, $src, $src\t#@negD_reg_reg" %} ++ match(Set dummy (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %} + + ins_encode %{ -+ __ fneg_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src$$reg)); ++ __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_uop_d); ++ ins_pipe(pipe_serial); +%} + -+instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{ -+ match(Set dst (AbsI src)); ++instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST * 3); -+ format %{ "sraiw t0, $src, 0x1f\n\t" -+ "xorr $dst, $src, t0\n\t" -+ "subw $dst, $dst, t0\t#@absI_reg" %} ++ match(Set newval (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %} + + ins_encode %{ -+ __ sraiw(t0, as_Register($src$$reg), 0x1f); -+ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0); -+ __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_serial); +%} + -+instruct absI2L_reg(iRegLNoSp dst, iRegIorL2I src) %{ -+ match(Set dst (ConvI2L (AbsI src))); ++instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST * 3); -+ format %{ "sraiw t0, $src, 0x1f\n\t" -+ "xorr $dst, $src, t0\n\t" -+ "subw $dst, $dst, t0\t#@absI2L_reg" %} ++ match(Set dummy (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %} + + ins_encode %{ -+ __ sraiw(t0, as_Register($src$$reg), 0x1f); -+ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0); -+ __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_serial); +%} + -+instruct absL_reg(iRegLNoSp dst, iRegL src) %{ -+ match(Set dst (AbsL src)); ++instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); + -+ ins_cost(ALU_COST * 3); -+ format %{ "srai t0, $src, 0x3f\n\t" -+ "xorr $dst, $src, t0\n\t" -+ "sub $dst, $dst, t0\t#absL_reg" %} ++ match(Set newval (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %} + + ins_encode %{ -+ __ srai(t0, as_Register($src$$reg), 0x3f); -+ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0); -+ __ sub(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_serial); +%} + -+instruct absF_reg(fRegF dst, fRegF src) %{ -+ match(Set dst (AbsF src)); ++instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); + -+ ins_cost(XFER_COST); -+ format %{ "fsgnjx.s $dst, $src, $src\t#@absF_reg" %} -+ ins_encode %{ -+ __ fabs_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src$$reg)); -+ %} ++ match(Set dummy (GetAndAddI mem incr)); + -+ ins_pipe(fp_uop_s); -+%} ++ ins_cost(ALU_COST); + -+instruct absD_reg(fRegD dst, fRegD src) %{ -+ match(Set dst (AbsD src)); ++ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %} + -+ ins_cost(XFER_COST); -+ format %{ "fsgnjx.d $dst, $src, $src\t#@absD_reg" %} + ins_encode %{ -+ __ fabs_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src$$reg)); ++ __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_uop_d); ++ ins_pipe(pipe_serial); +%} + -+instruct sqrtF_reg(fRegF dst, fRegF src) %{ -+ match(Set dst (SqrtF src)); ++instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr) ++%{ ++ predicate(needs_acquiring_load_reserved(n)); ++ ++ match(Set newval (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %} + -+ ins_cost(FSQRT_COST); -+ format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %} + ins_encode %{ -+ __ fsqrt_s(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src$$reg)); ++ __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_sqrt_s); ++ ins_pipe(pipe_serial); +%} + -+instruct sqrtD_reg(fRegD dst, fRegD src) %{ -+ match(Set dst (SqrtD src)); ++instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_reserved(n)); ++ ++ match(Set dummy (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %} + -+ ins_cost(FSQRT_COST); -+ format %{ "fsqrt.d $dst, $src\t#@sqrtD_reg" %} + ins_encode %{ -+ __ fsqrt_d(as_FloatRegister($dst$$reg), -+ as_FloatRegister($src$$reg)); ++ __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base)); + %} + -+ ins_pipe(fp_sqrt_d); ++ ins_pipe(pipe_serial); +%} + -+// Arithmetic Instructions End -+ +// ============================================================================ -+// Logical Instructions ++// Arithmetic Instructions ++// + -+// Register And -+instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ match(Set dst (AndI src1 src2)); ++// Integer Addition + -+ format %{ "andr $dst, $src1, $src2\t#@andI_reg_reg" %} ++// TODO ++// these currently employ operations which do not set CR and hence are ++// not flagged as killing CR but we would like to isolate the cases ++// where we want to set flags from those where we don't. need to work ++// out how to do that. ++instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (AddI src1 src2)); + + ins_cost(ALU_COST); ++ format %{ "addw $dst, $src1, $src2\t#@addI_reg_reg" %} ++ + ins_encode %{ -+ __ andr(as_Register($dst$$reg), ++ Assembler::CompressibleRegion cr(&_masm); ++ __ addw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} @@ -35186,31 +33783,49 @@ index 000000000..137e9b7c7 + ins_pipe(ialu_reg_reg); +%} + -+// Immediate And -+instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ -+ match(Set dst (AndI src1 src2)); -+ -+ format %{ "andi $dst, $src1, $src2\t#@andI_reg_imm" %} ++instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ ++ match(Set dst (AddI src1 src2)); + + ins_cost(ALU_COST); ++ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm" %} ++ + ins_encode %{ -+ __ andi(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (int32_t)($src2$$constant)); ++ Assembler::CompressibleRegion cr(&_masm); ++ int32_t con = (int32_t)$src2$$constant; ++ __ addiw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); + %} + + ins_pipe(ialu_reg_imm); +%} + -+// Register Or -+instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ match(Set dst (OrI src1 src2)); ++instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{ ++ match(Set dst (AddI (ConvL2I src1) src2)); + -+ format %{ "orr $dst, $src1, $src2\t#@orI_reg_reg" %} ++ ins_cost(ALU_COST); ++ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm_l2i" %} ++ ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ addiw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++// Pointer Addition ++instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{ ++ match(Set dst (AddP src1 src2)); + + ins_cost(ALU_COST); ++ format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %} ++ + ins_encode %{ -+ __ orr(as_Register($dst$$reg), ++ Assembler::CompressibleRegion cr(&_masm); ++ __ add(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} @@ -35218,63 +33833,82 @@ index 000000000..137e9b7c7 + ins_pipe(ialu_reg_reg); +%} + -+// Immediate Or -+instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ -+ match(Set dst (OrI src1 src2)); -+ -+ format %{ "ori $dst, $src1, $src2\t#@orI_reg_imm" %} -+ ++// If we shift more than 32 bits, we need not convert I2L. ++instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{ ++ match(Set dst (LShiftL (ConvI2L src) scale)); + ins_cost(ALU_COST); ++ format %{ "slli $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %} ++ + ins_encode %{ -+ __ ori(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (int32_t)($src2$$constant)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(ialu_reg_shift); +%} + -+// Register Xor -+instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ match(Set dst (XorI src1 src2)); ++// Pointer Immediate Addition ++// n.b. this needs to be more expensive than using an indirect memory ++// operand ++instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "addi $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %} ++ ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ // src2 is imm, so actually call the addi ++ __ add(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); ++ %} + -+ format %{ "xorr $dst, $src1, $src2\t#@xorI_reg_reg" %} ++ ins_pipe(ialu_reg_imm); ++%} + ++// Long Addition ++instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (AddL src1 src2)); + ins_cost(ALU_COST); ++ format %{ "add $dst, $src1, $src2\t#@addL_reg_reg" %} ++ + ins_encode %{ -+ __ xorr(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ add(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + -+// Immediate Xor -+instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ -+ match(Set dst (XorI src1 src2)); -+ -+ format %{ "xori $dst, $src1, $src2\t#@xorI_reg_imm" %} -+ ++// No constant pool entries requiredLong Immediate Addition. ++instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ++ match(Set dst (AddL src1 src2)); + ins_cost(ALU_COST); ++ format %{ "addi $dst, $src1, $src2\t#@addL_reg_imm" %} ++ + ins_encode %{ -+ __ xori(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (int32_t)($src2$$constant)); ++ Assembler::CompressibleRegion cr(&_masm); ++ // src2 is imm, so actually call the addi ++ __ add(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); + %} + + ins_pipe(ialu_reg_imm); +%} + -+// Register And Long -+instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (AndL src1 src2)); -+ -+ format %{ "andr $dst, $src1, $src2\t#@andL_reg_reg" %} ++// Integer Subtraction ++instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (SubI src1 src2)); + + ins_cost(ALU_COST); ++ format %{ "subw $dst, $src1, $src2\t#@subI_reg_reg" %} ++ + ins_encode %{ -+ __ andr(as_Register($dst$$reg), ++ Assembler::CompressibleRegion cr(&_masm); ++ __ subw(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} @@ -35282,31 +33916,33 @@ index 000000000..137e9b7c7 + ins_pipe(ialu_reg_reg); +%} + -+// Immediate And Long -+instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ -+ match(Set dst (AndL src1 src2)); -+ -+ format %{ "andi $dst, $src1, $src2\t#@andL_reg_imm" %} ++// Immediate Subtraction ++instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{ ++ match(Set dst (SubI src1 src2)); + + ins_cost(ALU_COST); ++ format %{ "addiw $dst, $src1, -$src2\t#@subI_reg_imm" %} ++ + ins_encode %{ -+ __ andi(as_Register($dst$$reg), ++ Assembler::CompressibleRegion cr(&_masm); ++ // src2 is imm, so actually call the addiw ++ __ subw(as_Register($dst$$reg), + as_Register($src1$$reg), -+ (int32_t)($src2$$constant)); ++ $src2$$constant); + %} + + ins_pipe(ialu_reg_imm); +%} + -+// Register Or Long -+instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (OrL src1 src2)); -+ -+ format %{ "orr $dst, $src1, $src2\t#@orL_reg_reg" %} -+ ++// Long Subtraction ++instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (SubL src1 src2)); + ins_cost(ALU_COST); ++ format %{ "sub $dst, $src1, $src2\t#@subL_reg_reg" %} ++ + ins_encode %{ -+ __ orr(as_Register($dst$$reg), ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sub(as_Register($dst$$reg), + as_Register($src1$$reg), + as_Register($src2$$reg)); + %} @@ -35314,5301 +33950,4461 @@ index 000000000..137e9b7c7 + ins_pipe(ialu_reg_reg); +%} + -+// Immediate Or Long -+instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ -+ match(Set dst (OrL src1 src2)); -+ -+ format %{ "ori $dst, $src1, $src2\t#@orL_reg_imm" %} -+ ++// No constant pool entries requiredLong Immediate Subtraction. ++instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{ ++ match(Set dst (SubL src1 src2)); + ins_cost(ALU_COST); ++ format %{ "addi $dst, $src1, -$src2\t#@subL_reg_imm" %} ++ + ins_encode %{ -+ __ ori(as_Register($dst$$reg), ++ Assembler::CompressibleRegion cr(&_masm); ++ // src2 is imm, so actually call the addi ++ __ sub(as_Register($dst$$reg), + as_Register($src1$$reg), -+ (int32_t)($src2$$constant)); ++ $src2$$constant); + %} + + ins_pipe(ialu_reg_imm); +%} + -+// Register Xor Long -+instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ -+ match(Set dst (XorL src1 src2)); -+ -+ format %{ "xorr $dst, $src1, $src2\t#@xorL_reg_reg" %} ++// Integer Negation (special case for sub) + ++instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{ ++ match(Set dst (SubI zero src)); + ins_cost(ALU_COST); ++ format %{ "subw $dst, x0, $src\t# int, #@negI_reg" %} ++ + ins_encode %{ -+ __ xorr(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); ++ // actually call the subw ++ __ negw(as_Register($dst$$reg), ++ as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(ialu_reg); +%} + -+// Immediate Xor Long -+instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ -+ match(Set dst (XorL src1 src2)); ++// Long Negation + ++instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{ ++ match(Set dst (SubL zero src)); + ins_cost(ALU_COST); -+ format %{ "xori $dst, $src1, $src2\t#@xorL_reg_imm" %} ++ format %{ "sub $dst, x0, $src\t# long, #@negL_reg" %} + + ins_encode %{ -+ __ xori(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ (int32_t)($src2$$constant)); ++ // actually call the sub ++ __ neg(as_Register($dst$$reg), ++ as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg_imm); ++ ins_pipe(ialu_reg); +%} + -+// ============================================================================ -+// BSWAP Instructions -+ -+instruct bytes_reverse_int(rFlagsReg cr, iRegINoSp dst, iRegIorL2I src) %{ -+ match(Set dst (ReverseBytesI src)); -+ effect(TEMP cr); ++// Integer Multiply + -+ ins_cost(ALU_COST * 13); -+ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int" %} ++instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (MulI src1 src2)); ++ ins_cost(IMUL_COST); ++ format %{ "mulw $dst, $src1, $src2\t#@mulI" %} + ++ //this means 2 word multi, and no sign extend to 64 bits + ins_encode %{ -+ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); ++ // riscv64 mulw will sign-extension to high 32 bits in dst reg ++ __ mulw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(imul_reg_reg); +%} + -+instruct bytes_reverse_long(rFlagsReg cr, iRegLNoSp dst, iRegL src) %{ -+ match(Set dst (ReverseBytesL src)); -+ effect(TEMP cr); ++// Long Multiply + -+ ins_cost(ALU_COST * 29); -+ format %{ "revb $dst, $src\t#@bytes_reverse_long" %} ++instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (MulL src1 src2)); ++ ins_cost(IMUL_COST); ++ format %{ "mul $dst, $src1, $src2\t#@mulL" %} + + ins_encode %{ -+ __ revb(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ mul(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(lmul_reg_reg); +%} + -+instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{ -+ match(Set dst (ReverseBytesUS src)); -+ -+ ins_cost(ALU_COST * 5); -+ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short" %} ++instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2) ++%{ ++ match(Set dst (MulHiL src1 src2)); ++ ins_cost(IMUL_COST); ++ format %{ "mulh $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %} + + ins_encode %{ -+ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ mulh(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(lmul_reg_reg); +%} + -+instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{ -+ match(Set dst (ReverseBytesS src)); ++// Integer Divide + -+ ins_cost(ALU_COST * 5); -+ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short" %} ++instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (DivI src1 src2)); ++ ins_cost(IDIVSI_COST); ++ format %{ "divw $dst, $src1, $src2\t#@divI"%} ++ ++ ins_encode(riscv_enc_divw(dst, src1, src2)); ++ ins_pipe(idiv_reg_reg); ++%} ++ ++instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{ ++ match(Set dst (URShiftI (RShiftI src1 div1) div2)); ++ ins_cost(ALU_COST); ++ format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %} + + ins_encode %{ -+ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31); + %} -+ -+ ins_pipe(ialu_reg); ++ ins_pipe(ialu_reg_shift); +%} + -+// ============================================================================ -+// MemBar Instruction ++// Long Divide + -+instruct load_fence() %{ -+ match(LoadFence); -+ ins_cost(ALU_COST); ++instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (DivL src1 src2)); ++ ins_cost(IDIVDI_COST); ++ format %{ "div $dst, $src1, $src2\t#@divL" %} + -+ format %{ "#@load_fence" %} ++ ins_encode(riscv_enc_div(dst, src1, src2)); ++ ins_pipe(ldiv_reg_reg); ++%} ++ ++instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{ ++ match(Set dst (URShiftL (RShiftL src1 div1) div2)); ++ ins_cost(ALU_COST); ++ format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %} + + ins_encode %{ -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63); + %} -+ ins_pipe(pipe_serial); ++ ins_pipe(ialu_reg_shift); +%} + -+instruct membar_acquire() %{ -+ match(MemBarAcquire); -+ ins_cost(ALU_COST); ++// Integer Remainder + -+ format %{ "#@membar_acquire\n\t" -+ "fence ir iorw" %} ++instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (ModI src1 src2)); ++ ins_cost(IDIVSI_COST); ++ format %{ "remw $dst, $src1, $src2\t#@modI" %} + -+ ins_encode %{ -+ __ block_comment("membar_acquire"); -+ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); -+ %} ++ ins_encode(riscv_enc_modw(dst, src1, src2)); ++ ins_pipe(ialu_reg_reg); ++%} + -+ ins_pipe(pipe_serial); ++// Long Remainder ++ ++instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (ModL src1 src2)); ++ ins_cost(IDIVDI_COST); ++ format %{ "rem $dst, $src1, $src2\t#@modL" %} ++ ++ ins_encode(riscv_enc_mod(dst, src1, src2)); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct membar_acquire_lock() %{ -+ match(MemBarAcquireLock); -+ ins_cost(0); ++// Integer Shifts + -+ format %{ "#@membar_acquire_lock (elided)" %} ++// Shift Left Register ++// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (LShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "sllw $dst, $src1, $src2\t#@lShiftI_reg_reg" %} + + ins_encode %{ -+ __ block_comment("membar_acquire_lock (elided)"); ++ __ sllw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(ialu_reg_reg_vshift); +%} + -+instruct store_fence() %{ -+ match(StoreFence); ++// Shift Left Immediate ++instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ ++ match(Set dst (LShiftI src1 src2)); + ins_cost(ALU_COST); -+ -+ format %{ "#@store_fence" %} ++ format %{ "slliw $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %} + + ins_encode %{ -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ // the shift amount is encoded in the lower ++ // 5 bits of the I-immediate field for RV32I ++ __ slliw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x1f); + %} -+ ins_pipe(pipe_serial); ++ ++ ins_pipe(ialu_reg_shift); +%} + -+instruct membar_release() %{ -+ match(MemBarRelease); ++// Shift Right Logical Register ++// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (URShiftI src1 src2)); + ins_cost(ALU_COST); -+ -+ format %{ "#@membar_release\n\t" -+ "fence iorw ow" %} ++ format %{ "srlw $dst, $src1, $src2\t#@urShiftI_reg_reg" %} + + ins_encode %{ -+ __ block_comment("membar_release"); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ srlw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} -+ ins_pipe(pipe_serial); ++ ++ ins_pipe(ialu_reg_reg_vshift); +%} + -+instruct membar_storestore() %{ -+ match(MemBarStoreStore); ++// Shift Right Logical Immediate ++instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ ++ match(Set dst (URShiftI src1 src2)); + ins_cost(ALU_COST); -+ -+ format %{ "MEMBAR-store-store\t#@membar_storestore" %} ++ format %{ "srliw $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %} + + ins_encode %{ -+ __ membar(MacroAssembler::StoreStore); ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ srliw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x1f); + %} -+ ins_pipe(pipe_serial); -+%} + -+instruct membar_release_lock() %{ -+ match(MemBarReleaseLock); -+ ins_cost(0); ++ ins_pipe(ialu_reg_shift); ++%} + -+ format %{ "#@membar_release_lock (elided)" %} ++// Shift Right Arithmetic Register ++// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (RShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "sraw $dst, $src1, $src2\t#@rShiftI_reg_reg" %} + + ins_encode %{ -+ __ block_comment("membar_release_lock (elided)"); ++ // riscv will sign-ext dst high 32 bits ++ __ sraw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(ialu_reg_reg_vshift); +%} + -+instruct membar_volatile() %{ -+ match(MemBarVolatile); ++// Shift Right Arithmetic Immediate ++instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ ++ match(Set dst (RShiftI src1 src2)); + ins_cost(ALU_COST); -+ -+ format %{ "#@membar_volatile\n\t" -+ "fence iorw iorw"%} ++ format %{ "sraiw $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %} + + ins_encode %{ -+ __ block_comment("membar_volatile"); -+ __ membar(MacroAssembler::StoreLoad); ++ // riscv will sign-ext dst high 32 bits ++ __ sraiw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x1f); + %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(ialu_reg_shift); +%} + -+// ============================================================================ -+// Cast Instructions (Java-level type cast) ++// Long Shifts + -+instruct castX2P(iRegPNoSp dst, iRegL src) %{ -+ match(Set dst (CastX2P src)); ++// Shift Left Register ++// In RV64I, only the low 6 bits of src2 are considered for the shift amount ++instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ ++ match(Set dst (LShiftL src1 src2)); + + ins_cost(ALU_COST); -+ format %{ "mv $dst, $src\t# long -> ptr, #@castX2P" %} ++ format %{ "sll $dst, $src1, $src2\t#@lShiftL_reg_reg" %} + + ins_encode %{ -+ if ($dst$$reg != $src$$reg) { -+ __ mv(as_Register($dst$$reg), as_Register($src$$reg)); -+ } ++ __ sll(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(ialu_reg_reg_vshift); +%} + -+instruct castP2X(iRegLNoSp dst, iRegP src) %{ -+ match(Set dst (CastP2X src)); ++// Shift Left Immediate ++instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ ++ match(Set dst (LShiftL src1 src2)); + + ins_cost(ALU_COST); -+ format %{ "mv $dst, $src\t# ptr -> long, #@castP2X" %} ++ format %{ "slli $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %} + + ins_encode %{ -+ if ($dst$$reg != $src$$reg) { -+ __ mv(as_Register($dst$$reg), as_Register($src$$reg)); -+ } ++ Assembler::CompressibleRegion cr(&_masm); ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ slli(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x3f); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(ialu_reg_shift); +%} + -+instruct castPP(iRegPNoSp dst) -+%{ -+ match(Set dst (CastPP dst)); -+ ins_cost(0); ++// Shift Right Logical Register ++// In RV64I, only the low 6 bits of src2 are considered for the shift amount ++instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ ++ match(Set dst (URShiftL src1 src2)); + -+ size(0); -+ format %{ "# castPP of $dst, #@castPP" %} -+ ins_encode(/* empty encoding */); -+ ins_pipe(pipe_class_empty); -+%} ++ ins_cost(ALU_COST); ++ format %{ "srl $dst, $src1, $src2\t#@urShiftL_reg_reg" %} + -+instruct castII(iRegI dst) -+%{ -+ match(Set dst (CastII dst)); ++ ins_encode %{ ++ __ srl(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} + -+ size(0); -+ format %{ "# castII of $dst, #@castII" %} -+ ins_encode(/* empty encoding */); -+ ins_cost(0); -+ ins_pipe(pipe_class_empty); ++ ins_pipe(ialu_reg_reg_vshift); +%} + -+instruct checkCastPP(iRegPNoSp dst) -+%{ -+ match(Set dst (CheckCastPP dst)); ++// Shift Right Logical Immediate ++instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ ++ match(Set dst (URShiftL src1 src2)); + -+ size(0); -+ ins_cost(0); -+ format %{ "# checkcastPP of $dst, #@checkCastPP" %} -+ ins_encode(/* empty encoding */); -+ ins_pipe(pipe_class_empty); -+%} ++ ins_cost(ALU_COST); ++ format %{ "srli $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %} + -+// ============================================================================ -+// Convert Instructions ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ srli(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x3f); ++ %} + -+// int to bool -+instruct convI2Bool(iRegINoSp dst, iRegI src) -+%{ -+ match(Set dst (Conv2B src)); ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// A special-case pattern for card table stores. ++instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ ++ match(Set dst (URShiftL (CastP2X src1) src2)); + + ins_cost(ALU_COST); -+ format %{ "snez $dst, $src\t#@convI2Bool" %} ++ format %{ "srli $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %} + + ins_encode %{ -+ __ snez(as_Register($dst$$reg), as_Register($src$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ srli(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x3f); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(ialu_reg_shift); +%} + -+// pointer to bool -+instruct convP2Bool(iRegINoSp dst, iRegP src) -+%{ -+ match(Set dst (Conv2B src)); ++// Shift Right Arithmetic Register ++// In RV64I, only the low 6 bits of src2 are considered for the shift amount ++instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ ++ match(Set dst (RShiftL src1 src2)); + + ins_cost(ALU_COST); -+ format %{ "snez $dst, $src\t#@convP2Bool" %} ++ format %{ "sra $dst, $src1, $src2\t#@rShiftL_reg_reg" %} + + ins_encode %{ -+ __ snez(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ sra(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(ialu_reg_reg_vshift); +%} + -+// int <-> long -+ -+instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src) -+%{ -+ match(Set dst (ConvI2L src)); ++// Shift Right Arithmetic Immediate ++instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ ++ match(Set dst (RShiftL src1 src2)); + + ins_cost(ALU_COST); -+ format %{ "addw $dst, $src, zr\t#@convI2L_reg_reg" %} ++ format %{ "srai $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %} ++ + ins_encode %{ -+ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); ++ Assembler::CompressibleRegion cr(&_masm); ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ srai(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x3f); + %} -+ ins_pipe(ialu_reg); -+%} + -+instruct convL2I_reg(iRegINoSp dst, iRegL src) %{ -+ match(Set dst (ConvL2I src)); ++ ins_pipe(ialu_reg_shift); ++%} + ++instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{ ++ match(Set dst (XorI src1 m1)); + ins_cost(ALU_COST); -+ format %{ "addw $dst, $src, zr\t#@convL2I_reg" %} ++ format %{ "xori $dst, $src1, -1\t#@regI_not_reg" %} + + ins_encode %{ -+ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); ++ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1); + %} + + ins_pipe(ialu_reg); +%} + -+// int to unsigned long (Zero-extend) -+instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) -+%{ -+ match(Set dst (AndL (ConvI2L src) mask)); -+ -+ ins_cost(ALU_COST * 2); -+ format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %} ++instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{ ++ match(Set dst (XorL src1 m1)); ++ ins_cost(ALU_COST); ++ format %{ "xori $dst, $src1, -1\t#@regL_not_reg" %} + + ins_encode %{ -+ __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32); ++ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(ialu_reg); +%} + -+// float <-> double + -+instruct convF2D_reg(fRegD dst, fRegF src) %{ -+ match(Set dst (ConvF2D src)); ++// ============================================================================ ++// Floating Point Arithmetic Instructions + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.d.s $dst, $src\t#@convF2D_reg" %} ++instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (AddF src1 src2)); ++ ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fadd.s $dst, $src1, $src2\t#@addF_reg_reg" %} + + ins_encode %{ -+ __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); ++ __ fadd_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(fp_f2d); ++ ins_pipe(fp_dop_reg_reg_s); +%} + -+instruct convD2F_reg(fRegF dst, fRegD src) %{ -+ match(Set dst (ConvD2F src)); ++instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (AddD src1 src2)); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.s.d $dst, $src\t#@convD2F_reg" %} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fadd.d $dst, $src1, $src2\t#@addD_reg_reg" %} + + ins_encode %{ -+ __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); ++ __ fadd_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(fp_d2f); ++ ins_pipe(fp_dop_reg_reg_d); +%} + -+// float <-> int -+ -+instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{ -+ match(Set dst (ConvF2I src)); ++instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (SubF src1 src2)); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.w.s $dst, $src\t#@convF2I_reg_reg" %} ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fsub.s $dst, $src1, $src2\t#@subF_reg_reg" %} + + ins_encode %{ -+ __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister); ++ __ fsub_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(fp_f2i); ++ ins_pipe(fp_dop_reg_reg_s); +%} + -+instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{ -+ match(Set dst (ConvI2F src)); ++instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (SubD src1 src2)); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.s.w $dst, $src\t#@convI2F_reg_reg" %} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fsub.d $dst, $src1, $src2\t#@subD_reg_reg" %} + + ins_encode %{ -+ __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ __ fsub_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(fp_i2f); ++ ins_pipe(fp_dop_reg_reg_d); +%} + -+// float <-> long -+ -+instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{ -+ match(Set dst (ConvF2L src)); ++instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (MulF src1 src2)); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.l.s $dst, $src\t#@convF2L_reg_reg" %} ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fmul.s $dst, $src1, $src2\t#@mulF_reg_reg" %} + + ins_encode %{ -+ __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister); ++ __ fmul_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(fp_f2l); ++ ins_pipe(fp_dop_reg_reg_s); +%} + -+instruct convL2F_reg_reg(fRegF dst, iRegL src) %{ -+ match(Set dst (ConvL2F src)); ++instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (MulD src1 src2)); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.s.l $dst, $src\t#@convL2F_reg_reg" %} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fmul.d $dst, $src1, $src2\t#@mulD_reg_reg" %} + + ins_encode %{ -+ __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ __ fmul_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(fp_l2f); ++ ins_pipe(fp_dop_reg_reg_d); +%} + -+// double <-> int -+ -+instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{ -+ match(Set dst (ConvD2I src)); ++// src1 * src2 + src3 ++instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary src1 src2))); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.w.d $dst, $src\t#@convD2I_reg_reg" %} ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fmadd.s $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %} + + ins_encode %{ -+ __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister); ++ __ fmadd_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(fp_d2i); ++ ins_pipe(pipe_class_default); +%} + -+instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{ -+ match(Set dst (ConvI2D src)); ++// src1 * src2 + src3 ++instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary src1 src2))); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.d.w $dst, $src\t#@convI2D_reg_reg" %} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fmadd.d $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %} + + ins_encode %{ -+ __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ __ fmadd_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(fp_i2d); ++ ins_pipe(pipe_class_default); +%} + -+// double <-> long -+ -+instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{ -+ match(Set dst (ConvD2L src)); ++// src1 * src2 - src3 ++instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary src1 src2))); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.l.d $dst, $src\t#@convD2L_reg_reg" %} ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fmsub.s $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %} + + ins_encode %{ -+ __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister); ++ __ fmsub_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(fp_d2l); ++ ins_pipe(pipe_class_default); +%} + -+instruct convL2D_reg_reg(fRegD dst, iRegL src) %{ -+ match(Set dst (ConvL2D src)); ++// src1 * src2 - src3 ++instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary src1 src2))); + -+ ins_cost(XFER_COST); -+ format %{ "fcvt.d.l $dst, $src\t#@convL2D_reg_reg" %} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fmsub.d $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %} + + ins_encode %{ -+ __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ __ fmsub_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(fp_l2d); ++ ins_pipe(pipe_class_default); +%} + -+// Convert oop into int for vectors alignment masking -+instruct convP2I(iRegINoSp dst, iRegP src) %{ -+ match(Set dst (ConvL2I (CastP2X src))); ++// -src1 * src2 + src3 ++instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary (NegF src1) src2))); ++ match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); + -+ ins_cost(ALU_COST * 2); -+ format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %} ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fnmsub.s $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %} + + ins_encode %{ -+ __ zero_extend($dst$$Register, $src$$Register, 32); ++ __ fnmsub_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_class_default); +%} + -+// Convert compressed oop into int for vectors alignment masking -+// in case of 32bit oops (heap < 4Gb). -+instruct convN2I(iRegINoSp dst, iRegN src) -+%{ -+ predicate(Universe::narrow_oop_shift() == 0); -+ match(Set dst (ConvL2I (CastP2X (DecodeN src)))); ++// -src1 * src2 + src3 ++instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary (NegD src1) src2))); ++ match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); + -+ ins_cost(ALU_COST); -+ format %{ "mv $dst, $src\t# compressed ptr -> int, #@convN2I" %} ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fnmsub.d $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %} + + ins_encode %{ -+ __ mv($dst$$Register, $src$$Register); ++ __ fnmsub_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_class_default); +%} + -+// Convert oop pointer into compressed form -+instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{ -+ match(Set dst (EncodeP src)); -+ ins_cost(ALU_COST); -+ format %{ "encode_heap_oop $dst, $src\t#@encodeHeapOop" %} -+ ins_encode %{ -+ Register s = $src$$Register; -+ Register d = $dst$$Register; -+ __ encode_heap_oop(d, s); -+ %} -+ ins_pipe(ialu_reg); -+%} ++// -src1 * src2 - src3 ++instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); ++ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); + -+instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{ -+ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && -+ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); -+ match(Set dst (DecodeN src)); ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fnmadd.s $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %} + -+ ins_cost(0); -+ format %{ "decode_heap_oop $dst, $src\t#@decodeHeapOop" %} + ins_encode %{ -+ Register s = $src$$Register; -+ Register d = $dst$$Register; -+ __ decode_heap_oop(d, s); ++ __ fnmadd_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} -+ ins_pipe(ialu_reg); ++ ++ ins_pipe(pipe_class_default); +%} + -+instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{ -+ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || -+ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); -+ match(Set dst (DecodeN src)); ++// -src1 * src2 - src3 ++instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); ++ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); ++ ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fnmadd.d $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %} + -+ ins_cost(0); -+ format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %} + ins_encode %{ -+ Register s = $src$$Register; -+ Register d = $dst$$Register; -+ __ decode_heap_oop_not_null(d, s); ++ __ fnmadd_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); + %} -+ ins_pipe(ialu_reg); ++ ++ ins_pipe(pipe_class_default); +%} + -+// Convert klass pointer into compressed form. -+instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{ -+ match(Set dst (EncodePKlass src)); ++// Math.max(FF)F ++instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (MaxF src1 src2)); ++ effect(TEMP_DEF dst); + -+ ins_cost(ALU_COST); -+ format %{ "encode_klass_not_null $dst, $src\t#@encodeKlass_not_null" %} ++ format %{ "maxF $dst, $src1, $src2" %} + + ins_encode %{ -+ Register src_reg = as_Register($src$$reg); -+ Register dst_reg = as_Register($dst$$reg); -+ __ encode_klass_not_null(dst_reg, src_reg, t0); ++ __ minmax_FD(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), ++ false /* is_double */, false /* is_min */); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(fp_dop_reg_reg_s); +%} + -+instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{ -+ predicate(!maybe_use_tmp_register_decoding_klass()); -+ -+ match(Set dst (DecodeNKlass src)); ++// Math.min(FF)F ++instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (MinF src1 src2)); ++ effect(TEMP_DEF dst); + -+ ins_cost(ALU_COST); -+ format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %} ++ format %{ "minF $dst, $src1, $src2" %} + + ins_encode %{ -+ Register src_reg = as_Register($src$$reg); -+ Register dst_reg = as_Register($dst$$reg); -+ __ decode_klass_not_null(dst_reg, src_reg, UseCompressedOops ? xheapbase : t0); ++ __ minmax_FD(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), ++ false /* is_double */, true /* is_min */); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(fp_dop_reg_reg_s); +%} + -+instruct decodeKlass_not_null_with_tmp(iRegPNoSp dst, iRegN src, rFlagsReg tmp) %{ -+ predicate(maybe_use_tmp_register_decoding_klass()); -+ -+ match(Set dst (DecodeNKlass src)); -+ -+ effect(TEMP tmp); ++// Math.max(DD)D ++instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (MaxD src1 src2)); ++ effect(TEMP_DEF dst); + -+ ins_cost(ALU_COST); -+ format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %} ++ format %{ "maxD $dst, $src1, $src2" %} + + ins_encode %{ -+ Register src_reg = as_Register($src$$reg); -+ Register dst_reg = as_Register($dst$$reg); -+ Register tmp_reg = as_Register($tmp$$reg); -+ __ decode_klass_not_null(dst_reg, src_reg, tmp_reg); ++ __ minmax_FD(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), ++ true /* is_double */, false /* is_min */); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(fp_dop_reg_reg_d); +%} + -+// stack <-> reg and reg <-> reg shuffles with no conversion ++// Math.min(DD)D ++instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (MinD src1 src2)); ++ effect(TEMP_DEF dst); + -+instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{ ++ format %{ "minD $dst, $src1, $src2" %} + -+ match(Set dst (MoveF2I src)); ++ ins_encode %{ ++ __ minmax_FD(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg), ++ true /* is_double */, true /* is_min */); ++ %} + -+ effect(DEF dst, USE src); ++ ins_pipe(fp_dop_reg_reg_d); ++%} + -+ ins_cost(LOAD_COST); ++instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (DivF src1 src2)); + -+ format %{ "lw $dst, $src\t#@MoveF2I_stack_reg" %} ++ ins_cost(FDIV_COST); ++ format %{ "fdiv.s $dst, $src1, $src2\t#@divF_reg_reg" %} + + ins_encode %{ -+ __ lw(as_Register($dst$$reg), Address(sp, $src$$disp)); ++ __ fdiv_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(iload_reg_reg); -+ ++ ins_pipe(fp_div_s); +%} + -+instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{ -+ -+ match(Set dst (MoveI2F src)); -+ -+ effect(DEF dst, USE src); -+ -+ ins_cost(LOAD_COST); ++instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (DivD src1 src2)); + -+ format %{ "flw $dst, $src\t#@MoveI2F_stack_reg" %} ++ ins_cost(FDIV_COST); ++ format %{ "fdiv.d $dst, $src1, $src2\t#@divD_reg_reg" %} + + ins_encode %{ -+ __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); ++ __ fdiv_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); + %} + -+ ins_pipe(pipe_class_memory); -+ ++ ins_pipe(fp_div_d); +%} + -+instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{ ++instruct negF_reg_reg(fRegF dst, fRegF src) %{ ++ match(Set dst (NegF src)); + -+ match(Set dst (MoveD2L src)); ++ ins_cost(XFER_COST); ++ format %{ "fsgnjn.s $dst, $src, $src\t#@negF_reg_reg" %} + -+ effect(DEF dst, USE src); ++ ins_encode %{ ++ __ fneg_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); ++ %} + -+ ins_cost(LOAD_COST); ++ ins_pipe(fp_uop_s); ++%} + -+ format %{ "ld $dst, $src\t#@MoveD2L_stack_reg" %} ++instruct negD_reg_reg(fRegD dst, fRegD src) %{ ++ match(Set dst (NegD src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fsgnjn.d $dst, $src, $src\t#@negD_reg_reg" %} + + ins_encode %{ -+ __ ld(as_Register($dst$$reg), Address(sp, $src$$disp)); ++ __ fneg_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); + %} + -+ ins_pipe(iload_reg_reg); -+ ++ ins_pipe(fp_uop_d); +%} + -+instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{ ++instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{ ++ match(Set dst (AbsI src)); + -+ match(Set dst (MoveL2D src)); ++ ins_cost(ALU_COST * 3); ++ format %{ ++ "sraiw t0, $src, 0x1f\n\t" ++ "addw $dst, $src, t0\n\t" ++ "xorr $dst, $dst, t0\t#@absI_reg" ++ %} + -+ effect(DEF dst, USE src); ++ ins_encode %{ ++ __ sraiw(t0, as_Register($src$$reg), 0x1f); ++ __ addw(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ %} + -+ ins_cost(LOAD_COST); ++ ins_pipe(ialu_reg_reg); ++%} + -+ format %{ "fld $dst, $src\t#@MoveL2D_stack_reg" %} ++instruct absL_reg(iRegLNoSp dst, iRegL src) %{ ++ match(Set dst (AbsL src)); + -+ ins_encode %{ -+ __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); ++ ins_cost(ALU_COST * 3); ++ format %{ ++ "srai t0, $src, 0x3f\n\t" ++ "add $dst, $src, t0\n\t" ++ "xorr $dst, $dst, t0\t#@absL_reg" + %} + -+ ins_pipe(pipe_class_memory); ++ ins_encode %{ ++ __ srai(t0, as_Register($src$$reg), 0x3f); ++ __ add(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ xorr(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ %} + ++ ins_pipe(ialu_reg_reg); +%} + -+instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{ -+ -+ match(Set dst (MoveF2I src)); ++instruct absF_reg(fRegF dst, fRegF src) %{ ++ match(Set dst (AbsF src)); + -+ effect(DEF dst, USE src); ++ ins_cost(XFER_COST); ++ format %{ "fsgnjx.s $dst, $src, $src\t#@absF_reg" %} ++ ins_encode %{ ++ __ fabs_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); ++ %} + -+ ins_cost(STORE_COST); ++ ins_pipe(fp_uop_s); ++%} + -+ format %{ "fsw $src, $dst\t#@MoveF2I_reg_stack" %} ++instruct absD_reg(fRegD dst, fRegD src) %{ ++ match(Set dst (AbsD src)); + ++ ins_cost(XFER_COST); ++ format %{ "fsgnjx.d $dst, $src, $src\t#@absD_reg" %} + ins_encode %{ -+ __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); ++ __ fabs_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); + %} + -+ ins_pipe(pipe_class_memory); -+ ++ ins_pipe(fp_uop_d); +%} + -+instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{ -+ -+ match(Set dst (MoveI2F src)); ++instruct sqrtF_reg(fRegF dst, fRegF src) %{ ++ match(Set dst (SqrtF src)); + -+ effect(DEF dst, USE src); ++ ins_cost(FSQRT_COST); ++ format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %} ++ ins_encode %{ ++ __ fsqrt_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); ++ %} + -+ ins_cost(STORE_COST); ++ ins_pipe(fp_sqrt_s); ++%} + -+ format %{ "sw $src, $dst\t#@MoveI2F_reg_stack" %} ++instruct sqrtD_reg(fRegD dst, fRegD src) %{ ++ match(Set dst (SqrtD src)); + ++ ins_cost(FSQRT_COST); ++ format %{ "fsqrt.d $dst, $src\t#@sqrtD_reg" %} + ins_encode %{ -+ __ sw(as_Register($src$$reg), Address(sp, $dst$$disp)); ++ __ fsqrt_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); + %} + -+ ins_pipe(istore_reg_reg); -+ ++ ins_pipe(fp_sqrt_d); +%} + -+instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{ -+ -+ match(Set dst (MoveD2L src)); ++// Arithmetic Instructions End + -+ effect(DEF dst, USE src); ++// ============================================================================ ++// Logical Instructions + -+ ins_cost(STORE_COST); ++// Register And ++instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ match(Set dst (AndI src1 src2)); + -+ format %{ "fsd $dst, $src\t#@MoveD2L_reg_stack" %} ++ format %{ "andr $dst, $src1, $src2\t#@andI_reg_reg" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ andr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_class_memory); -+ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{ -+ -+ match(Set dst (MoveL2D src)); -+ -+ effect(DEF dst, USE src); -+ -+ ins_cost(STORE_COST); ++// Immediate And ++instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ ++ match(Set dst (AndI src1 src2)); + -+ format %{ "sd $src, $dst\t#@MoveL2D_reg_stack" %} ++ format %{ "andi $dst, $src1, $src2\t#@andI_reg_imm" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ sd(as_Register($src$$reg), Address(sp, $dst$$disp)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ andi(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); + %} + -+ ins_pipe(istore_reg_reg); -+ ++ ins_pipe(ialu_reg_imm); +%} + -+instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{ ++// Register Or ++instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ match(Set dst (OrI src1 src2)); + -+ match(Set dst (MoveF2I src)); ++ format %{ "orr $dst, $src1, $src2\t#@orI_reg_reg" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ Assembler::CompressibleRegion cr(&_masm); ++ __ orr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} + -+ effect(DEF dst, USE src); ++ ins_pipe(ialu_reg_reg); ++%} + -+ ins_cost(XFER_COST); ++// Immediate Or ++instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ ++ match(Set dst (OrI src1 src2)); + -+ format %{ "fmv.x.w $dst, $src\t#@MoveL2D_reg_stack" %} ++ format %{ "ori $dst, $src1, $src2\t#@orI_reg_imm" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg)); ++ __ ori(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); + %} + -+ ins_pipe(fp_f2i); -+ ++ ins_pipe(ialu_reg_imm); +%} + -+instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{ -+ -+ match(Set dst (MoveI2F src)); -+ -+ effect(DEF dst, USE src); -+ -+ ins_cost(XFER_COST); ++// Register Xor ++instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ match(Set dst (XorI src1 src2)); + -+ format %{ "fmv.w.x $dst, $src\t#@MoveI2F_reg_reg" %} ++ format %{ "xorr $dst, $src1, $src2\t#@xorI_reg_reg" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ xorr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(fp_i2f); -+ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{ -+ -+ match(Set dst (MoveD2L src)); -+ -+ effect(DEF dst, USE src); -+ -+ ins_cost(XFER_COST); ++// Immediate Xor ++instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ ++ match(Set dst (XorI src1 src2)); + -+ format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %} ++ format %{ "xori $dst, $src1, $src2\t#@xorI_reg_imm" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg)); ++ __ xori(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); + %} + -+ ins_pipe(fp_d2l); -+ ++ ins_pipe(ialu_reg_imm); +%} + -+instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{ -+ -+ match(Set dst (MoveL2D src)); -+ -+ effect(DEF dst, USE src); -+ -+ ins_cost(XFER_COST); ++// Register And Long ++instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (AndL src1 src2)); + -+ format %{ "fmv.d.x $dst, $src\t#@MoveD2L_reg_reg" %} ++ format %{ "andr $dst, $src1, $src2\t#@andL_reg_reg" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ andr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(fp_l2d); ++ ins_pipe(ialu_reg_reg); +%} + -+// ============================================================================ -+// Compare Instructions which set the result float comparisons in dest register. -+ -+instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2) -+%{ -+ match(Set dst (CmpF3 op1 op2)); ++// Immediate And Long ++instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ++ match(Set dst (AndL src1 src2)); + -+ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST); -+ format %{ "flt.s $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t" -+ "bgtz $dst, done\n\t" -+ "feq.s $dst, $op1, $op2\n\t" -+ "addi $dst, $dst, -1\t#@cmpF3_reg_reg" -+ %} ++ format %{ "andi $dst, $src1, $src2\t#@andL_reg_imm" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ // we want -1 for unordered or less than, 0 for equal and 1 for greater than. -+ __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), -+ as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ andi(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(ialu_reg_imm); +%} + -+instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2) -+%{ -+ match(Set dst (CmpD3 op1 op2)); ++// Register Or Long ++instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (OrL src1 src2)); + -+ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST); -+ format %{ "flt.d $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t" -+ "bgtz $dst, done\n\t" -+ "feq.d $dst, $op1, $op2\n\t" -+ "addi $dst, $dst, -1\t#@cmpD3_reg_reg" -+ %} ++ format %{ "orr $dst, $src1, $src2\t#@orL_reg_reg" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ // we want -1 for unordered or less than, 0 for equal and 1 for greater than. -+ __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ orr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2) -+%{ -+ match(Set dst (CmpL3 op1 op2)); ++// Immediate Or Long ++instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ++ match(Set dst (OrL src1 src2)); + -+ ins_cost(ALU_COST * 3 + BRANCH_COST); -+ format %{ "slt $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t" -+ "bnez $dst, done\n\t" -+ "slt $dst, $op1, $op2\n\t" -+ "neg $dst, $dst\t#@cmpL3_reg_reg" -+ %} ++ format %{ "ori $dst, $src1, $src2\t#@orL_reg_imm" %} ++ ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg)); -+ __ mv(as_Register($dst$$reg), t0); ++ __ ori(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(ialu_reg_imm); +%} + -+instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q) -+%{ -+ match(Set dst (CmpLTMask p q)); -+ -+ ins_cost(2 * ALU_COST); ++// Register Xor Long ++instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (XorL src1 src2)); + -+ format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t" -+ "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg" -+ %} ++ format %{ "xorr $dst, $src1, $src2\t#@xorL_reg_reg" %} + ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg)); -+ __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ xorr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} + + ins_pipe(ialu_reg_reg); +%} + -+instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero) -+%{ -+ match(Set dst (CmpLTMask op zero)); ++// Immediate Xor Long ++instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ++ match(Set dst (XorL src1 src2)); + + ins_cost(ALU_COST); -+ -+ format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %} ++ format %{ "xori $dst, $src1, $src2\t#@xorL_reg_imm" %} + + ins_encode %{ -+ __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31); ++ __ xori(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(ialu_reg_imm); +%} + -+ +// ============================================================================ -+// Max and Min -+ -+instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2) -+%{ -+ match(Set dst (MinI src1 src2)); ++// BSWAP Instructions + -+ effect(DEF dst, USE src1, USE src2); ++instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr) %{ ++ match(Set dst (ReverseBytesI src)); ++ effect(TEMP cr); + -+ ins_cost(BRANCH_COST + ALU_COST * 2); -+ format %{ -+ "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t" -+ "mv $dst, $src2\n\t" -+ "j Ldone\n\t" -+ "bind Lsrc1\n\t" -+ "mv $dst, $src1\n\t" -+ "bind\t#@minI_rReg" -+ %} ++ ins_cost(ALU_COST * 13); ++ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int" %} + + ins_encode %{ -+ Label Lsrc1, Ldone; -+ __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1); -+ __ mv(as_Register($dst$$reg), as_Register($src2$$reg)); -+ __ j(Ldone); -+ __ bind(Lsrc1); -+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); -+ __ bind(Ldone); ++ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(ialu_reg); +%} + -+instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2) -+%{ -+ match(Set dst (MaxI src1 src2)); -+ -+ effect(DEF dst, USE src1, USE src2); ++instruct bytes_reverse_long(iRegLNoSp dst, iRegL src, rFlagsReg cr) %{ ++ match(Set dst (ReverseBytesL src)); ++ effect(TEMP cr); + -+ ins_cost(BRANCH_COST + ALU_COST * 2); -+ format %{ -+ "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t" -+ "mv $dst, $src2\n\t" -+ "j Ldone\n\t" -+ "bind Lsrc1\n\t" -+ "mv $dst, $src1\n\t" -+ "bind\t#@maxI_rReg" -+ %} ++ ins_cost(ALU_COST * 29); ++ format %{ "revb $dst, $src\t#@bytes_reverse_long" %} + + ins_encode %{ -+ Label Lsrc1, Ldone; -+ __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1); -+ __ mv(as_Register($dst$$reg), as_Register($src2$$reg)); -+ __ j(Ldone); -+ __ bind(Lsrc1); -+ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); -+ __ bind(Ldone); -+ ++ __ revb(as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(ialu_reg); +%} + -+// ============================================================================ -+// Branch Instructions -+// Direct Branch. -+instruct branch(label lbl) -+%{ -+ match(Goto); -+ -+ effect(USE lbl); ++instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{ ++ match(Set dst (ReverseBytesUS src)); + -+ ins_cost(BRANCH_COST); -+ format %{ "j $lbl\t#@branch" %} ++ ins_cost(ALU_COST * 5); ++ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short" %} + -+ ins_encode(riscv_enc_j(lbl)); ++ ins_encode %{ ++ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} + -+ ins_pipe(pipe_branch); ++ ins_pipe(ialu_reg); +%} + -+// ============================================================================ -+// Compare and Branch Instructions -+ -+// Patterns for short (< 12KiB) variants -+ -+// Compare flags and branch near instructions. -+instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{ -+ match(If cmp cr); -+ effect(USE lbl); ++instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{ ++ match(Set dst (ReverseBytesS src)); + -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $cr, zr, $lbl\t#@cmpFlag_branch" %} ++ ins_cost(ALU_COST * 5); ++ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label)); ++ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); -+%} + -+// Compare signed int and branch near instructions -+instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) -+%{ -+ // Same match rule as `far_cmpI_branch'. -+ match(If cmp (CmpI op1 op2)); ++ ins_pipe(ialu_reg); ++%} + -+ effect(USE lbl); ++// ============================================================================ ++// MemBar Instruction + -+ ins_cost(BRANCH_COST); ++instruct load_fence() %{ ++ match(LoadFence); ++ ins_cost(ALU_COST); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_branch" %} ++ format %{ "#@load_fence" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + %} -+ -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_serial); +%} + -+instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) -+%{ -+ // Same match rule as `far_cmpI_loop'. -+ match(CountedLoopEnd cmp (CmpI op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct membar_acquire() %{ ++ match(MemBarAcquire); ++ ins_cost(ALU_COST); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_loop" %} ++ format %{ "#@membar_acquire\n\t" ++ "fence ir iorw" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); ++ __ block_comment("membar_acquire"); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_serial); +%} + -+// Compare unsigned int and branch near instructions -+instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) -+%{ -+ // Same match rule as `far_cmpU_branch'. -+ match(If cmp (CmpU op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct membar_acquire_lock() %{ ++ match(MemBarAcquireLock); ++ ins_cost(0); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %} ++ format %{ "#@membar_acquire_lock (elided)" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ __ block_comment("membar_acquire_lock (elided)"); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_serial); +%} + -+instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) -+%{ -+ // Same match rule as `far_cmpU_loop'. -+ match(CountedLoopEnd cmp (CmpU op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct store_fence() %{ ++ match(StoreFence); ++ ins_cost(ALU_COST); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %} ++ format %{ "#@store_fence" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + %} -+ -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_serial); +%} + -+// Compare signed long and branch near instructions -+instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) -+%{ -+ // Same match rule as `far_cmpL_branch'. -+ match(If cmp (CmpL op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct membar_release() %{ ++ match(MemBarRelease); ++ ins_cost(ALU_COST); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_branch" %} ++ format %{ "#@membar_release\n\t" ++ "fence iorw ow" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); ++ __ block_comment("membar_release"); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + %} -+ -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_serial); +%} + -+instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) -+%{ -+ // Same match rule as `far_cmpL_loop'. -+ match(CountedLoopEnd cmp (CmpL op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct membar_storestore() %{ ++ match(MemBarStoreStore); ++ ins_cost(ALU_COST); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_loop" %} ++ format %{ "MEMBAR-store-store\t#@membar_storestore" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); ++ __ membar(MacroAssembler::StoreStore); + %} -+ -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_serial); +%} + -+// Compare unsigned long and branch near instructions -+instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) -+%{ -+ // Same match rule as `far_cmpUL_branch'. -+ match(If cmp (CmpUL op1 op2)); -+ -+ effect(USE lbl); ++instruct membar_release_lock() %{ ++ match(MemBarReleaseLock); ++ ins_cost(0); + -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %} ++ format %{ "#@membar_release_lock (elided)" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ __ block_comment("membar_release_lock (elided)"); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_serial); +%} + -+instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) -+%{ -+ // Same match rule as `far_cmpUL_loop'. -+ match(CountedLoopEnd cmp (CmpUL op1 op2)); -+ -+ effect(USE lbl); ++instruct membar_volatile() %{ ++ match(MemBarVolatile); ++ ins_cost(ALU_COST); + -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %} ++ format %{ "#@membar_volatile\n\t" ++ "fence iorw iorw"%} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ __ block_comment("membar_volatile"); ++ __ membar(MacroAssembler::StoreLoad); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(pipe_serial); +%} + -+// Compare pointer and branch near instructions -+instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) -+%{ -+ // Same match rule as `far_cmpP_branch'. -+ match(If cmp (CmpP op1 op2)); -+ -+ effect(USE lbl); ++// ============================================================================ ++// Cast Instructions (Java-level type cast) + -+ ins_cost(BRANCH_COST); ++instruct castX2P(iRegPNoSp dst, iRegL src) %{ ++ match(Set dst (CastX2P src)); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %} ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $src\t# long -> ptr, #@castX2P" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ Assembler::CompressibleRegion cr(&_masm); ++ if ($dst$$reg != $src$$reg) { ++ __ mv(as_Register($dst$$reg), as_Register($src$$reg)); ++ } + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg); +%} + -+instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) -+%{ -+ // Same match rule as `far_cmpP_loop'. -+ match(CountedLoopEnd cmp (CmpP op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct castP2X(iRegLNoSp dst, iRegP src) %{ ++ match(Set dst (CastP2X src)); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %} ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $src\t# ptr -> long, #@castP2X" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ Assembler::CompressibleRegion cr(&_masm); ++ if ($dst$$reg != $src$$reg) { ++ __ mv(as_Register($dst$$reg), as_Register($src$$reg)); ++ } + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg); +%} + -+// Compare narrow pointer and branch near instructions -+instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) ++instruct castPP(iRegPNoSp dst) +%{ -+ // Same match rule as `far_cmpN_branch'. -+ match(If cmp (CmpN op1 op2)); -+ -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++ match(Set dst (CastPP dst)); ++ ins_cost(0); + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %} ++ size(0); ++ format %{ "# castPP of $dst, #@castPP" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(pipe_class_empty); ++%} + -+ ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); -+ %} ++instruct castII(iRegI dst) ++%{ ++ match(Set dst (CastII dst)); + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ size(0); ++ format %{ "# castII of $dst, #@castII" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(pipe_class_empty); +%} + -+instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) ++instruct checkCastPP(iRegPNoSp dst) +%{ -+ // Same match rule as `far_cmpN_loop'. -+ match(CountedLoopEnd cmp (CmpN op1 op2)); ++ match(Set dst (CheckCastPP dst)); + -+ effect(USE lbl); ++ size(0); ++ ins_cost(0); ++ format %{ "# checkcastPP of $dst, #@checkCastPP" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(pipe_class_empty); ++%} + -+ ins_cost(BRANCH_COST); ++// ============================================================================ ++// Convert Instructions + -+ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %} ++// int to bool ++instruct convI2Bool(iRegINoSp dst, iRegI src) ++%{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "snez $dst, $src\t#@convI2Bool" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label)); ++ __ snez(as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg); +%} + -+// Compare float and branch near instructions -+instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) ++// pointer to bool ++instruct convP2Bool(iRegINoSp dst, iRegP src) +%{ -+ // Same match rule as `far_cmpF_branch'. -+ match(If cmp (CmpF op1 op2)); -+ -+ effect(USE lbl); ++ match(Set dst (Conv2B src)); + -+ ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "float_b$cmp $op1, $op2 $lbl \t#@cmpF_branch"%} ++ ins_cost(ALU_COST); ++ format %{ "snez $dst, $src\t#@convP2Bool" %} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); ++ __ snez(as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_class_compare); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg); +%} + -+instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) -+%{ -+ // Same match rule as `far_cmpF_loop'. -+ match(CountedLoopEnd cmp (CmpF op1 op2)); -+ effect(USE lbl); ++// int <-> long + -+ ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%} ++instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src) ++%{ ++ match(Set dst (ConvI2L src)); + ++ ins_cost(ALU_COST); ++ format %{ "addw $dst, $src, zr\t#@convI2L_reg_reg" %} + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); ++ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); + %} -+ -+ ins_pipe(pipe_class_compare); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg); +%} + -+// Compare double and branch near instructions -+instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) -+%{ -+ // Same match rule as `far_cmpD_branch'. -+ match(If cmp (CmpD op1 op2)); -+ effect(USE lbl); ++instruct convL2I_reg(iRegINoSp dst, iRegL src) %{ ++ match(Set dst (ConvL2I src)); + -+ ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%} ++ ins_cost(ALU_COST); ++ format %{ "addw $dst, $src, zr\t#@convL2I_reg" %} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -+ as_FloatRegister($op2$$reg), *($lbl$$label)); ++ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); + %} + -+ ins_pipe(pipe_class_compare); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg); +%} + -+instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++// int to unsigned long (Zero-extend) ++instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) +%{ -+ // Same match rule as `far_cmpD_loop'. -+ match(CountedLoopEnd cmp (CmpD op1 op2)); -+ effect(USE lbl); ++ match(Set dst (AndL (ConvI2L src) mask)); + -+ ins_cost(XFER_COST + BRANCH_COST); -+ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%} ++ ins_cost(ALU_COST * 2); ++ format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -+ as_FloatRegister($op2$$reg), *($lbl$$label)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32); + %} + -+ ins_pipe(pipe_class_compare); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg_shift); +%} + -+// Compare signed int with zero and branch near instructions -+instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpI_reg_imm0_branch'. -+ match(If cmp (CmpI op1 zero)); ++// float <-> double + -+ effect(USE op1, USE lbl); ++instruct convF2D_reg(fRegD dst, fRegF src) %{ ++ match(Set dst (ConvF2D src)); + -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %} ++ ins_cost(XFER_COST); ++ format %{ "fcvt.d.s $dst, $src\t#@convF2D_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_f2d); +%} + -+instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpI_reg_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpI op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct convD2F_reg(fRegF dst, fRegD src) %{ ++ match(Set dst (ConvD2F src)); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %} ++ ins_cost(XFER_COST); ++ format %{ "fcvt.s.d $dst, $src\t#@convD2F_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_d2f); +%} + -+// Compare unsigned int with zero and branch near instructions -+instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'. -+ match(If cmp (CmpU op1 zero)); -+ -+ effect(USE op1, USE lbl); ++// float <-> int + -+ ins_cost(BRANCH_COST); ++instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{ ++ match(Set dst (ConvF2I src)); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %} ++ ins_cost(XFER_COST); ++ format %{ "fcvt.w.s $dst, $src\t#@convF2I_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_f2i); +%} + -+instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpU op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); -+ -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %} ++instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{ ++ match(Set dst (ConvI2F src)); + ++ ins_cost(XFER_COST); ++ format %{ "fcvt.s.w $dst, $src\t#@convI2F_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_i2f); +%} + -+// Compare signed long with zero and branch near instructions -+instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpL_reg_imm0_branch'. -+ match(If cmp (CmpL op1 zero)); -+ -+ effect(USE op1, USE lbl); ++// float <-> long + -+ ins_cost(BRANCH_COST); ++instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{ ++ match(Set dst (ConvF2L src)); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %} ++ ins_cost(XFER_COST); ++ format %{ "fcvt.l.s $dst, $src\t#@convF2L_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_f2l); +%} + -+instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpL_reg_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpL op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct convL2F_reg_reg(fRegF dst, iRegL src) %{ ++ match(Set dst (ConvL2F src)); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %} ++ ins_cost(XFER_COST); ++ format %{ "fcvt.s.l $dst, $src\t#@convL2F_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_l2f); +%} + -+// Compare unsigned long with zero and branch near instructions -+instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'. -+ match(If cmp (CmpUL op1 zero)); -+ -+ effect(USE op1, USE lbl); ++// double <-> int + -+ ins_cost(BRANCH_COST); ++instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{ ++ match(Set dst (ConvD2I src)); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %} ++ ins_cost(XFER_COST); ++ format %{ "fcvt.w.d $dst, $src\t#@convD2I_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_d2i); +%} + -+instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpUL op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST); ++instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{ ++ match(Set dst (ConvI2D src)); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %} ++ ins_cost(XFER_COST); ++ format %{ "fcvt.d.w $dst, $src\t#@convI2D_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_i2d); +%} + -+// Compare pointer with zero and branch near instructions -+instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ -+ // Same match rule as `far_cmpP_reg_imm0_branch'. -+ match(If cmp (CmpP op1 zero)); -+ effect(USE lbl); ++// double <-> long + -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_branch" %} ++instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{ ++ match(Set dst (ConvD2L src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.l.d $dst, $src\t#@convD2L_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_d2l); +%} + -+instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ -+ // Same match rule as `far_cmpP_reg_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpP op1 zero)); -+ effect(USE lbl); ++instruct convL2D_reg_reg(fRegD dst, iRegL src) %{ ++ match(Set dst (ConvL2D src)); + -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_loop" %} ++ ins_cost(XFER_COST); ++ format %{ "fcvt.d.l $dst, $src\t#@convL2D_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(fp_l2d); +%} + -+// Compare narrow pointer with zero and branch near instructions -+instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ -+ // Same match rule as `far_cmpN_reg_imm0_branch'. -+ match(If cmp (CmpN op1 zero)); -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++// Convert oop into int for vectors alignment masking ++instruct convP2I(iRegINoSp dst, iRegP src) %{ ++ match(Set dst (ConvL2I (CastP2X src))); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_branch" %} ++ ins_cost(ALU_COST * 2); ++ format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ zero_extend($dst$$Register, $src$$Register, 32); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg); +%} + -+instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ -+ // Same match rule as `far_cmpN_reg_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpN op1 zero)); -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); ++// Convert compressed oop into int for vectors alignment masking ++// in case of 32bit oops (heap < 4Gb). ++instruct convN2I(iRegINoSp dst, iRegN src) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ match(Set dst (ConvL2I (CastP2X (DecodeN src)))); + -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_loop" %} ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $src\t# compressed ptr -> int, #@convN2I" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ mv($dst$$Register, $src$$Register); + %} + -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg); +%} + -+// Compare narrow pointer with pointer zero and branch near instructions -+instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ -+ // Same match rule as `far_cmpP_narrowOop_imm0_branch'. -+ match(If cmp (CmpP (DecodeN op1) zero)); -+ effect(USE lbl); ++// Convert oop pointer into compressed form ++instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{ ++ match(Set dst (EncodeP src)); ++ ins_cost(ALU_COST); ++ format %{ "encode_heap_oop $dst, $src\t#@encodeHeapOop" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ __ encode_heap_oop(d, s); ++ %} ++ ins_pipe(ialu_reg); ++%} + -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %} ++instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && ++ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); ++ match(Set dst (DecodeN src)); + ++ ins_cost(0); ++ format %{ "decode_heap_oop $dst, $src\t#@decodeHeapOop" %} + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ __ decode_heap_oop(d, s); + %} -+ -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg); +%} + -+instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ -+ // Same match rule as `far_cmpP_narrowOop_imm0_loop'. -+ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero)); -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST); -+ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %} ++instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || ++ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); ++ match(Set dst (DecodeN src)); + ++ ins_cost(0); ++ format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %} + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ __ decode_heap_oop_not_null(d, s); + %} -+ -+ ins_pipe(pipe_cmpz_branch); -+ ins_short_branch(1); ++ ins_pipe(ialu_reg); +%} + -+// Patterns for far (20KiB) variants -+ -+instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{ -+ match(If cmp cr); -+ effect(USE lbl); ++// Convert klass pointer into compressed form. ++instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{ ++ match(Set dst (EncodePKlass src)); + -+ ins_cost(BRANCH_COST); -+ format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%} ++ ins_cost(ALU_COST); ++ format %{ "encode_klass_not_null $dst, $src\t#@encodeKlass_not_null" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true); ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ __ encode_klass_not_null(dst_reg, src_reg, t0); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg); +%} + -+// Compare signed int and branch far instructions -+instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{ -+ match(If cmp (CmpI op1 op2)); -+ effect(USE lbl); ++instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src, iRegPNoSp tmp) %{ ++ match(Set dst (DecodeNKlass src)); + -+ ins_cost(BRANCH_COST * 2); ++ effect(TEMP tmp); + -+ // the format instruction [far_b$cmp] here is be used as two insructions -+ // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done) -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_branch" %} ++ ins_cost(ALU_COST); ++ format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ Register tmp_reg = as_Register($tmp$$reg); ++ __ decode_klass_not_null(dst_reg, src_reg, tmp_reg); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(ialu_reg); +%} + -+instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpI op1 op2)); -+ effect(USE lbl); ++// stack <-> reg and reg <-> reg shuffles with no conversion + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_loop" %} ++instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{ + -+ ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); -+ %} ++ match(Set dst (MoveF2I src)); + -+ ins_pipe(pipe_cmp_branch); -+%} ++ effect(DEF dst, USE src); + -+instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ -+ match(If cmp (CmpU op1 op2)); -+ effect(USE lbl); ++ ins_cost(LOAD_COST); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %} ++ format %{ "lw $dst, $src\t#@MoveF2I_stack_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ lw(as_Register($dst$$reg), Address(sp, $src$$disp)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(iload_reg_reg); ++ +%} + -+instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpU op1 op2)); -+ effect(USE lbl); ++instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{ + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %} ++ match(Set dst (MoveI2F src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(LOAD_COST); ++ ++ format %{ "flw $dst, $src\t#@MoveI2F_stack_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(pipe_class_memory); ++ +%} + -+instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{ -+ match(If cmp (CmpL op1 op2)); -+ effect(USE lbl); ++instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{ + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_branch" %} ++ match(Set dst (MoveD2L src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(LOAD_COST); ++ ++ format %{ "ld $dst, $src\t#@MoveD2L_stack_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ ld(as_Register($dst$$reg), Address(sp, $src$$disp)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(iload_reg_reg); ++ +%} + -+instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpL op1 op2)); -+ effect(USE lbl); ++instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{ + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_loop" %} ++ match(Set dst (MoveL2D src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(LOAD_COST); ++ ++ format %{ "fld $dst, $src\t#@MoveL2D_stack_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(pipe_class_memory); ++ +%} + -+instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ -+ match(If cmp (CmpUL op1 op2)); -+ effect(USE lbl); ++instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{ + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %} ++ match(Set dst (MoveF2I src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(STORE_COST); ++ ++ format %{ "fsw $src, $dst\t#@MoveF2I_reg_stack" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(pipe_class_memory); ++ +%} + -+instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpUL op1 op2)); -+ effect(USE lbl); ++instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{ + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %} ++ match(Set dst (MoveI2F src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(STORE_COST); ++ ++ format %{ "sw $src, $dst\t#@MoveI2F_reg_stack" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sw(as_Register($src$$reg), Address(sp, $dst$$disp)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(istore_reg_reg); ++ +%} + -+instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) -+%{ -+ match(If cmp (CmpP op1 op2)); ++instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{ + -+ effect(USE lbl); ++ match(Set dst (MoveD2L src)); + -+ ins_cost(BRANCH_COST * 2); ++ effect(DEF dst, USE src); + -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %} ++ ins_cost(STORE_COST); ++ ++ format %{ "fsd $dst, $src\t#@MoveD2L_reg_stack" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(pipe_class_memory); ++ +%} + -+instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpP op1 op2)); ++instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{ + -+ effect(USE lbl); ++ match(Set dst (MoveL2D src)); + -+ ins_cost(BRANCH_COST * 2); ++ effect(DEF dst, USE src); + -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %} ++ ins_cost(STORE_COST); ++ ++ format %{ "sd $src, $dst\t#@MoveL2D_reg_stack" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ Assembler::CompressibleRegion cr(&_masm); ++ __ sd(as_Register($src$$reg), Address(sp, $dst$$disp)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(istore_reg_reg); ++ +%} + -+instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) -+%{ -+ match(If cmp (CmpN op1 op2)); ++instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{ + -+ effect(USE lbl); ++ match(Set dst (MoveF2I src)); + -+ ins_cost(BRANCH_COST * 2); ++ effect(DEF dst, USE src); + -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %} ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.x.w $dst, $src\t#@MoveL2D_reg_stack" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(fp_f2i); ++ +%} + -+instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpN op1 op2)); ++instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{ + -+ effect(USE lbl); ++ match(Set dst (MoveI2F src)); + -+ ins_cost(BRANCH_COST * 2); ++ effect(DEF dst, USE src); + -+ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %} ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.w.x $dst, $src\t#@MoveI2F_reg_reg" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), -+ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_cmp_branch); ++ ins_pipe(fp_i2f); ++ +%} + -+// Float compare and branch instructions -+instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) -+%{ -+ match(If cmp (CmpF op1 op2)); ++instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{ + -+ effect(USE lbl); ++ match(Set dst (MoveD2L src)); + -+ ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%} ++ effect(DEF dst, USE src); ++ ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -+ *($lbl$$label), /* is_far */ true); ++ __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg)); + %} + -+ ins_pipe(pipe_class_compare); ++ ins_pipe(fp_d2l); ++ +%} + -+instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpF op1 op2)); -+ effect(USE lbl); ++instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{ + -+ ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%} ++ match(Set dst (MoveL2D src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.d.x $dst, $src\t#@MoveD2L_reg_reg" %} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -+ *($lbl$$label), /* is_far */ true); ++ __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(pipe_class_compare); ++ ins_pipe(fp_l2d); +%} + -+// Double compare and branch instructions -+instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++// ============================================================================ ++// Compare Instructions which set the result float comparisons in dest register. ++ ++instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2) +%{ -+ match(If cmp (CmpD op1 op2)); -+ effect(USE lbl); ++ match(Set dst (CmpF3 op1 op2)); + -+ ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%} ++ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST); ++ format %{ "flt.s $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t" ++ "bgtz $dst, done\n\t" ++ "feq.s $dst, $op1, $op2\n\t" ++ "addi $dst, $dst, -1\t#@cmpF3_reg_reg" ++ %} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -+ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); ++ // we want -1 for unordered or less than, 0 for equal and 1 for greater than. ++ __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/); + %} + -+ ins_pipe(pipe_class_compare); ++ ins_pipe(pipe_class_default); +%} + -+instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2) +%{ -+ match(CountedLoopEnd cmp (CmpD op1 op2)); -+ effect(USE lbl); ++ match(Set dst (CmpD3 op1 op2)); + -+ ins_cost(XFER_COST + BRANCH_COST * 2); -+ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%} ++ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST); ++ format %{ "flt.d $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t" ++ "bgtz $dst, done\n\t" ++ "feq.d $dst, $op1, $op2\n\t" ++ "addi $dst, $dst, -1\t#@cmpD3_reg_reg" ++ %} + + ins_encode %{ -+ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), -+ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); ++ // we want -1 for unordered or less than, 0 for equal and 1 for greater than. ++ __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/); + %} + -+ ins_pipe(pipe_class_compare); ++ ins_pipe(pipe_class_default); +%} + -+instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl) ++instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2) +%{ -+ match(If cmp (CmpI op1 zero)); -+ -+ effect(USE op1, USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); -+ -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %} ++ match(Set dst (CmpL3 op1 op2)); + ++ ins_cost(ALU_COST * 3 + BRANCH_COST); ++ format %{ "slt $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t" ++ "bnez $dst, done\n\t" ++ "slt $dst, $op1, $op2\n\t" ++ "neg $dst, $dst\t#@cmpL3_reg_reg" ++ %} + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); ++ __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg)); ++ __ mv(as_Register($dst$$reg), t0); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_class_default); +%} + -+instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl) ++instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q) +%{ -+ match(CountedLoopEnd cmp (CmpI op1 zero)); -+ -+ effect(USE op1, USE lbl); ++ match(Set dst (CmpLTMask p q)); + -+ ins_cost(BRANCH_COST * 2); ++ ins_cost(2 * ALU_COST); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %} ++ format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t" ++ "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg" ++ %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); ++ __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg)); ++ __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) ++instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero) +%{ -+ match(If cmp (CmpU op1 zero)); -+ -+ effect(USE op1, USE lbl); ++ match(Set dst (CmpLTMask op zero)); + -+ ins_cost(BRANCH_COST * 2); ++ ins_cost(ALU_COST); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %} ++ format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg_shift); +%} + -+instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) -+%{ -+ match(CountedLoopEnd cmp (CmpU op1 zero)); + -+ effect(USE op1, USE lbl); ++// ============================================================================ ++// Max and Min + -+ ins_cost(BRANCH_COST * 2); ++instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2) ++%{ ++ match(Set dst (MinI src1 src2)); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %} ++ effect(DEF dst, USE src1, USE src2); + ++ ins_cost(BRANCH_COST + ALU_COST * 2); ++ format %{ ++ "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t" ++ "mv $dst, $src2\n\t" ++ "j Ldone\n\t" ++ "bind Lsrc1\n\t" ++ "mv $dst, $src1\n\t" ++ "bind\t#@minI_rReg" ++ %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ Label Lsrc1, Ldone; ++ __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1); ++ __ mv(as_Register($dst$$reg), as_Register($src2$$reg)); ++ __ j(Ldone); ++ __ bind(Lsrc1); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg_reg); +%} + -+// compare lt/ge unsigned instructs has no short instruct with same match -+instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl) ++instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2) +%{ -+ match(If cmp (CmpU op1 zero)); ++ match(Set dst (MaxI src1 src2)); + -+ effect(USE op1, USE lbl); ++ effect(DEF dst, USE src1, USE src2); + -+ ins_cost(BRANCH_COST); ++ ins_cost(BRANCH_COST + ALU_COST * 2); ++ format %{ ++ "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t" ++ "mv $dst, $src2\n\t" ++ "j Ldone\n\t" ++ "bind Lsrc1\n\t" ++ "mv $dst, $src1\n\t" ++ "bind\t#@maxI_rReg" ++ %} + -+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %} ++ ins_encode %{ ++ Label Lsrc1, Ldone; ++ __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1); ++ __ mv(as_Register($dst$$reg), as_Register($src2$$reg)); ++ __ j(Ldone); ++ __ bind(Lsrc1); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); + -+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl) ++// ============================================================================ ++// Branch Instructions ++// Direct Branch. ++instruct branch(label lbl) +%{ -+ match(CountedLoopEnd cmp (CmpU op1 zero)); ++ match(Goto); + -+ effect(USE op1, USE lbl); ++ effect(USE lbl); + + ins_cost(BRANCH_COST); ++ format %{ "j $lbl\t#@branch" %} + -+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %} -+ -+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ins_encode(riscv_enc_j(lbl)); + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_branch); +%} + -+instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl) -+%{ -+ match(If cmp (CmpL op1 zero)); ++// ============================================================================ ++// Compare and Branch Instructions + -+ effect(USE op1, USE lbl); ++// Patterns for short (< 12KiB) variants + -+ ins_cost(BRANCH_COST * 2); ++// Compare flags and branch near instructions. ++instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{ ++ match(If cmp cr); ++ effect(USE lbl); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %} ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $cr, zr, $lbl\t#@cmpFlag_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label)); + %} -+ + ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl) ++// Compare signed int and branch near instructions ++instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) +%{ -+ match(CountedLoopEnd cmp (CmpL op1 zero)); ++ // Same match rule as `far_cmpI_branch'. ++ match(If cmp (CmpI op1 op2)); + -+ effect(USE op1, USE lbl); ++ effect(USE lbl); + -+ ins_cost(BRANCH_COST * 2); ++ ins_cost(BRANCH_COST); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %} ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_branch" %} + + ins_encode %{ -+ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) ++instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) +%{ -+ match(If cmp (CmpUL op1 zero)); ++ // Same match rule as `far_cmpI_loop'. ++ match(CountedLoopEnd cmp (CmpI op1 op2)); + -+ effect(USE op1, USE lbl); ++ effect(USE lbl); + -+ ins_cost(BRANCH_COST * 2); ++ ins_cost(BRANCH_COST); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %} ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_loop" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) ++// Compare unsigned int and branch near instructions ++instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) +%{ -+ match(CountedLoopEnd cmp (CmpUL op1 zero)); ++ // Same match rule as `far_cmpU_branch'. ++ match(If cmp (CmpU op1 op2)); + -+ effect(USE op1, USE lbl); ++ effect(USE lbl); + -+ ins_cost(BRANCH_COST * 2); ++ ins_cost(BRANCH_COST); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %} ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %} + + ins_encode %{ -+ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+// compare lt/ge unsigned instructs has no short instruct with same match -+instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl) ++instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) +%{ -+ match(If cmp (CmpUL op1 zero)); ++ // Same match rule as `far_cmpU_loop'. ++ match(CountedLoopEnd cmp (CmpU op1 op2)); + -+ effect(USE op1, USE lbl); ++ effect(USE lbl); + + ins_cost(BRANCH_COST); + -+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %} ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %} + -+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); ++ %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl) ++// Compare signed long and branch near instructions ++instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) +%{ -+ match(CountedLoopEnd cmp (CmpUL op1 zero)); ++ // Same match rule as `far_cmpL_branch'. ++ match(If cmp (CmpL op1 op2)); + -+ effect(USE op1, USE lbl); ++ effect(USE lbl); + + ins_cost(BRANCH_COST); + -+ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %} ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_branch" %} + -+ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); ++ %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ -+ match(If cmp (CmpP op1 zero)); ++instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) ++%{ ++ // Same match rule as `far_cmpL_loop'. ++ match(CountedLoopEnd cmp (CmpL op1 op2)); ++ + effect(USE lbl); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %} ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_loop" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpP op1 zero)); ++// Compare unsigned long and branch near instructions ++instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) ++%{ ++ // Same match rule as `far_cmpUL_branch'. ++ match(If cmp (CmpUL op1 op2)); ++ + effect(USE lbl); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %} ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ -+ match(If cmp (CmpN op1 zero)); -+ effect(USE lbl); ++instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) ++%{ ++ // Same match rule as `far_cmpUL_loop'. ++ match(CountedLoopEnd cmp (CmpUL op1 op2)); + -+ ins_cost(BRANCH_COST * 2); ++ effect(USE lbl); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %} ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpN op1 zero)); ++// Compare pointer and branch near instructions ++instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) ++%{ ++ // Same match rule as `far_cmpP_branch'. ++ match(If cmp (CmpP op1 op2)); ++ + effect(USE lbl); + -+ ins_cost(BRANCH_COST * 2); ++ ins_cost(BRANCH_COST); + -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %} ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ -+ match(If cmp (CmpP (DecodeN op1) zero)); ++instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) ++%{ ++ // Same match rule as `far_cmpP_loop'. ++ match(CountedLoopEnd cmp (CmpP op1 op2)); ++ + effect(USE lbl); + -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %} ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %} + + ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_cmpz_branch); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ -+ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero)); -+ effect(USE lbl); -+ -+ ins_cost(BRANCH_COST * 2); -+ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %} -+ -+ ins_encode %{ -+ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); -+ %} ++// Compare narrow pointer and branch near instructions ++instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) ++%{ ++ // Same match rule as `far_cmpN_branch'. ++ match(If cmp (CmpN op1 op2)); + -+ ins_pipe(pipe_cmpz_branch); -+%} ++ effect(USE lbl); + -+// ============================================================================ -+// Conditional Move Instructions -+instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{ -+ match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); ++ ins_cost(BRANCH_COST); + -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{ -+ match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); ++instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) ++%{ ++ // Same match rule as `far_cmpN_loop'. ++ match(CountedLoopEnd cmp (CmpN op1 op2)); + -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); +%} + -+instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{ -+ match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++// Compare float and branch near instructions ++instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) ++%{ ++ // Same match rule as `far_cmpF_branch'. ++ match(If cmp (CmpF op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST); ++ format %{ "float_b$cmp $op1, $op2, $lbl \t#@cmpF_branch"%} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); ++ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_compare); ++ ins_short_branch(1); +%} + -+instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{ -+ match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); -+ format %{ "bneg$cop $op1 $op2, skip\t#@cmovI_cmpUL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) ++%{ ++ // Same match rule as `far_cmpF_loop'. ++ match(CountedLoopEnd cmp (CmpF op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST); ++ format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); ++ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_compare); ++ ins_short_branch(1); +%} + -+instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{ -+ match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); ++// Compare double and branch near instructions ++instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++%{ ++ // Same match rule as `far_cmpD_branch'. ++ match(If cmp (CmpD op1 op2)); ++ effect(USE lbl); + -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ ins_cost(XFER_COST + BRANCH_COST); ++ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_compare); ++ ins_short_branch(1); +%} + -+instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{ -+ match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src))); -+ ins_cost(ALU_COST + BRANCH_COST); ++instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++%{ ++ // Same match rule as `far_cmpD_loop'. ++ match(CountedLoopEnd cmp (CmpD op1 op2)); ++ effect(USE lbl); + -+ format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t" -+ "mv $dst, $src\n\t" -+ "skip:" -+ %} ++ ins_cost(XFER_COST + BRANCH_COST); ++ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%} + + ins_encode %{ -+ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, -+ as_Register($op1$$reg), as_Register($op2$$reg), -+ as_Register($dst$$reg), as_Register($src$$reg)); ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), *($lbl$$label)); + %} + -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_compare); ++ ins_short_branch(1); +%} + -+ -+// ============================================================================ -+// Procedure Call/Return Instructions -+ -+// Call Java Static Instruction -+ -+instruct CallStaticJavaDirect(method meth) ++// Compare signed int with zero and branch near instructions ++instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl) +%{ -+ match(CallStaticJava); ++ // Same match rule as `far_cmpI_reg_imm0_branch'. ++ match(If cmp (CmpI op1 zero)); + -+ effect(USE meth); ++ effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %} + -+ format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %} -+ -+ ins_encode( riscv_enc_java_static_call(meth), -+ riscv_enc_call_epilog ); ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ %} + -+ ins_pipe(pipe_class_call); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+// TO HERE -+ -+// Call Java Dynamic Instruction -+instruct CallDynamicJavaDirect(method meth, rFlagsReg cr) ++instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl) +%{ -+ match(CallDynamicJava); ++ // Same match rule as `far_cmpI_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpI op1 zero)); + -+ effect(USE meth, KILL cr); ++ effect(USE op1, USE lbl); + -+ ins_cost(BRANCH_COST + ALU_COST * 6); ++ ins_cost(BRANCH_COST); + -+ format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %} ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %} + -+ ins_encode( riscv_enc_java_dynamic_call(meth), -+ riscv_enc_call_epilog ); ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ %} + -+ ins_pipe(pipe_class_call); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+// Call Runtime Instruction -+ -+instruct CallRuntimeDirect(method meth, rFlagsReg cr) ++// Compare unsigned int with zero and branch near instructions ++instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) +%{ -+ match(CallRuntime); ++ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'. ++ match(If cmp (CmpU op1 zero)); + -+ effect(USE meth, KILL cr); ++ effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + -+ format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %} ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %} + -+ ins_encode( riscv_enc_java_to_runtime(meth) ); ++ ins_encode %{ ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} + -+ ins_pipe(pipe_class_call); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+// Call Runtime Instruction -+ -+instruct CallLeafDirect(method meth, rFlagsReg cr) ++instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) +%{ -+ match(CallLeaf); ++ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpU op1 zero)); + -+ effect(USE meth, KILL cr); ++ effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + -+ format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %} ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %} + -+ ins_encode( riscv_enc_java_to_runtime(meth) ); + -+ ins_pipe(pipe_class_call); -+%} ++ ins_encode %{ ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} + -+// Call Runtime Instruction ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} + -+instruct CallLeafNoFPDirect(method meth, rFlagsReg cr) ++// Compare signed long with zero and branch near instructions ++instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl) +%{ -+ match(CallLeafNoFP); ++ // Same match rule as `far_cmpL_reg_imm0_branch'. ++ match(If cmp (CmpL op1 zero)); + -+ effect(USE meth, KILL cr); ++ effect(USE op1, USE lbl); + + ins_cost(BRANCH_COST); + -+ format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %} ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %} + -+ ins_encode( riscv_enc_java_to_runtime(meth) ); ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ %} + -+ ins_pipe(pipe_class_call); ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+// ============================================================================ -+// Partial Subtype Check -+// -+// superklass array for an instance of the superklass. Set a hidden -+// internal cache on a hit (cache is checked with exposed code in -+// gen_subtype_check()). Return zero for a hit. The encoding -+// ALSO sets flags. -+ -+instruct partialSubtypeCheck(rFlagsReg cr, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result) ++instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl) +%{ -+ match(Set result (PartialSubtypeCheck sub super)); -+ effect(KILL temp, KILL cr); ++ // Same match rule as `far_cmpL_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpL op1 zero)); + -+ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); -+ format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %} ++ effect(USE op1, USE lbl); + -+ ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result)); ++ ins_cost(BRANCH_COST); + -+ opcode(0x1); // Force zero of result reg on hit ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %} + -+ ins_pipe(pipe_class_memory); ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct partialSubtypeCheckVsZero(iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result, -+ immP0 zero, rFlagsReg cr) ++// Compare unsigned long with zero and branch near instructions ++instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) +%{ -+ match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); -+ effect(KILL temp, KILL result); -+ -+ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); -+ format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %} -+ -+ ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result)); ++ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'. ++ match(If cmp (CmpUL op1 zero)); + -+ opcode(0x0); // Don't zero result reg on hit ++ effect(USE op1, USE lbl); + -+ ins_pipe(pipe_class_memory); -+%} ++ ins_cost(BRANCH_COST); + -+instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %} + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} + ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_compare($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, -+ StrIntrinsicNode::UU); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) ++instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) +%{ -+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpUL op1 zero)); + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} -+ ins_encode %{ -+ __ string_compare($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, -+ StrIntrinsicNode::LL); -+ %} -+ ins_pipe(pipe_class_memory); -+%} ++ effect(USE op1, USE lbl); + -+instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %} + -+ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} + ins_encode %{ -+ __ string_compare($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, -+ StrIntrinsicNode::UL); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, -+ rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++// Compare pointer with zero and branch near instructions ++instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ ++ // Same match rule as `far_cmpP_reg_imm0_branch'. ++ match(If cmp (CmpP op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_branch" %} + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} + ins_encode %{ -+ __ string_compare($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, -+ StrIntrinsicNode::LU); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, -+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) -+%{ -+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %} ++instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ ++ // Same match rule as `far_cmpP_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpP op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_loop" %} + + ins_encode %{ -+ __ string_indexof($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, -+ $tmp5$$Register, $tmp6$$Register, -+ $result$$Register, StrIntrinsicNode::UU); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, -+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) -+%{ -+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %} ++// Compare narrow pointer with zero and branch near instructions ++instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ ++ // Same match rule as `far_cmpN_reg_imm0_branch'. ++ match(If cmp (CmpN op1 zero)); ++ effect(USE lbl); + -+ ins_encode %{ -+ __ string_indexof($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, -+ $tmp5$$Register, $tmp6$$Register, -+ $result$$Register, StrIntrinsicNode::LL); -+ %} -+ ins_pipe(pipe_class_memory); -+%} ++ ins_cost(BRANCH_COST); + -+instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, -+ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) -+%{ -+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); -+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %} ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_branch" %} + + ins_encode %{ -+ __ string_indexof($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, -+ $tmp5$$Register, $tmp6$$Register, -+ $result$$Register, StrIntrinsicNode::UL); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, -+ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) -+%{ -+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %} ++instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ ++ // Same match rule as `far_cmpN_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpN op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_loop" %} + + ins_encode %{ -+ int icnt2 = (int)$int_cnt2$$constant; -+ __ string_indexof_linearscan($str1$$Register, $str2$$Register, -+ $cnt1$$Register, zr, -+ $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, -+ icnt2, $result$$Register, StrIntrinsicNode::UU); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, -+ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) -+%{ -+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %} ++// Compare narrow pointer with pointer zero and branch near instructions ++instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ ++ // Same match rule as `far_cmpP_narrowOop_imm0_branch'. ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %} + + ins_encode %{ -+ int icnt2 = (int)$int_cnt2$$constant; -+ __ string_indexof_linearscan($str1$$Register, $str2$$Register, -+ $cnt1$$Register, zr, -+ $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, -+ icnt2, $result$$Register, StrIntrinsicNode::LL); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, -+ immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) -+%{ -+ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); -+ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); -+ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %} ++instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ ++ // Same match rule as `far_cmpP_narrowOop_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %} + + ins_encode %{ -+ int icnt2 = (int)$int_cnt2$$constant; -+ __ string_indexof_linearscan($str1$$Register, $str2$$Register, -+ $cnt1$$Register, zr, -+ $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, -+ icnt2, $result$$Register, StrIntrinsicNode::UL); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); +%} + -+instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) -+%{ -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); -+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); ++// Patterns for far (20KiB) variants + -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} ++instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{ ++ match(If cmp cr); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%} + + ins_encode %{ -+ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, -+ $result$$Register, $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, false /* isU */) ; ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmpz_branch); +%} + ++// Compare signed int and branch far instructions ++instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{ ++ match(If cmp (CmpI op1 op2)); ++ effect(USE lbl); + -+instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, -+ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, -+ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) -+%{ -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); -+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, -+ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); ++ ins_cost(BRANCH_COST * 2); + -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} ++ // the format instruction [far_b$cmp] here is be used as two insructions ++ // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done) ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_branch" %} + + ins_encode %{ -+ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, -+ $result$$Register, $tmp1$$Register, $tmp2$$Register, -+ $tmp3$$Register, $tmp4$$Register, true /* isL */); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+// clearing of an array -+instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) -+%{ -+ predicate(!UseRVV); -+ match(Set dummy (ClearArray cnt base)); -+ effect(USE_KILL cnt, USE_KILL base, KILL cr); ++instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpI op1 op2)); ++ effect(USE lbl); + -+ ins_cost(4 * DEFAULT_COST); -+ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_loop" %} + + ins_encode %{ -+ address tpc = __ zero_words($base$$Register, $cnt$$Register); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(pipe_class_memory); ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && (uint64_t)n->in(2)->get_long() -+ < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); -+ match(Set dummy (ClearArray cnt base)); -+ effect(USE_KILL base, KILL cr); ++instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ ++ match(If cmp (CmpU op1 op2)); ++ effect(USE lbl); + -+ ins_cost(4 * DEFAULT_COST); -+ format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %} ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %} + + ins_encode %{ -+ __ zero_words($base$$Register, (uint64_t)$cnt$$constant); ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(pipe_class_memory); ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, -+ iRegI_R10 result, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (StrEquals (Binary str1 str2) cnt)); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); ++instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpU op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %} + -+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} + ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_equals($str1$$Register, $str2$$Register, -+ $result$$Register, $cnt$$Register, 1); ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, -+ iRegI_R10 result, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (StrEquals (Binary str1 str2) cnt)); -+ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); ++instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{ ++ match(If cmp (CmpL op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_branch" %} + -+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} + ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_equals($str1$$Register, $str2$$Register, -+ $result$$Register, $cnt$$Register, 2); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -+ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, -+ iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (AryEq ary1 ary2)); -+ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpL op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_loop" %} + -+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} + ins_encode %{ -+ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, -+ $result$$Register, $tmp$$Register, 1); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -+ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, -+ iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr) -+%{ -+ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (AryEq ary1 ary2)); -+ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ ++ match(If cmp (CmpUL op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %} + -+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} + ins_encode %{ -+ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register, -+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, -+ $result$$Register, $tmp$$Register, 2); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+// ============================================================================ -+// Safepoint Instructions ++instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpUL op1 op2)); ++ effect(USE lbl); + -+instruct safePoint(iRegP poll) -+%{ -+ match(SafePoint poll); ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %} + -+ ins_cost(2 * LOAD_COST); -+ format %{ -+ "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint" -+ %} + ins_encode %{ -+ __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type); ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); + %} -+ ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem); ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+// ============================================================================ -+// This name is KNOWN by the ADLC and cannot be changed. -+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type -+// for this guy. -+instruct tlsLoadP(javaThread_RegP dst) ++instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) +%{ -+ match(Set dst (ThreadLocal)); ++ match(If cmp (CmpP op1 op2)); + -+ ins_cost(0); ++ effect(USE lbl); + -+ format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %} ++ ins_cost(BRANCH_COST * 2); + -+ size(0); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %} + -+ ins_encode( /*empty*/ ); ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} + -+ ins_pipe(pipe_class_empty); ++ ins_pipe(pipe_cmp_branch); +%} + -+// inlined locking and unlocking -+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers -+instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) ++instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) +%{ -+ match(Set cr (FastLock object box)); -+ effect(TEMP tmp, TEMP tmp2); -+ -+ ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3); -+ format %{ "fastlock $object,$box\t! kills $tmp,$tmp2, #@cmpFastLock" %} -+ -+ ins_encode(riscv_enc_fast_lock(object, box, tmp, tmp2)); ++ match(CountedLoopEnd cmp (CmpP op1 op2)); + -+ ins_pipe(pipe_serial); -+%} ++ effect(USE lbl); + -+// using t1 as the 'flag' register to bridge the BoolNode producers and consumers -+instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) -+%{ -+ match(Set cr (FastUnlock object box)); -+ effect(TEMP tmp, TEMP tmp2); ++ ins_cost(BRANCH_COST * 2); + -+ ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4); -+ format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2, #@cmpFastUnlock" %} ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %} + -+ ins_encode(riscv_enc_fast_unlock(object, box, tmp, tmp2)); ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} + -+ ins_pipe(pipe_serial); ++ ins_pipe(pipe_cmp_branch); +%} + -+// Tail Call; Jump from runtime stub to Java code. -+// Also known as an 'interprocedural jump'. -+// Target of jump will eventually return to caller. -+// TailJump below removes the return address. -+instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop) ++instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) +%{ -+ match(TailCall jump_target method_oop); ++ match(If cmp (CmpN op1 op2)); + -+ ins_cost(BRANCH_COST); ++ effect(USE lbl); + -+ format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %} ++ ins_cost(BRANCH_COST * 2); + -+ ins_encode(riscv_enc_tail_call(jump_target)); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %} + -+ ins_pipe(pipe_class_call); ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop) ++instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) +%{ -+ match(TailJump jump_target ex_oop); ++ match(CountedLoopEnd cmp (CmpN op1 op2)); + -+ ins_cost(ALU_COST + BRANCH_COST); ++ effect(USE lbl); + -+ format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %} ++ ins_cost(BRANCH_COST * 2); + -+ ins_encode(riscv_enc_tail_jmp(jump_target)); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %} + -+ ins_pipe(pipe_class_call); ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); +%} + -+// Create exception oop: created by stack-crawling runtime code. -+// Created exception is now available to this handler, and is setup -+// just prior to jumping to this handler. No code emitted. -+instruct CreateException(iRegP_R10 ex_oop) ++// Float compare and branch instructions ++instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) +%{ -+ match(Set ex_oop (CreateEx)); ++ match(If cmp (CmpF op1 op2)); + -+ ins_cost(0); -+ format %{ " -- \t// exception oop; no code emitted, #@CreateException" %} ++ effect(USE lbl); + -+ size(0); ++ ins_cost(XFER_COST + BRANCH_COST * 2); ++ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%} + -+ ins_encode( /*empty*/ ); ++ ins_encode %{ ++ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), ++ *($lbl$$label), /* is_far */ true); ++ %} + -+ ins_pipe(pipe_class_empty); ++ ins_pipe(pipe_class_compare); +%} + -+// Rethrow exception: The exception oop will come in the first -+// argument position. Then JUMP (not call) to the rethrow stub code. -+instruct RethrowException() ++instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) +%{ -+ match(Rethrow); -+ -+ ins_cost(BRANCH_COST); ++ match(CountedLoopEnd cmp (CmpF op1 op2)); ++ effect(USE lbl); + -+ format %{ "j rethrow_stub\t#@RethrowException" %} ++ ins_cost(XFER_COST + BRANCH_COST * 2); ++ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%} + -+ ins_encode( riscv_enc_rethrow() ); ++ ins_encode %{ ++ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), ++ *($lbl$$label), /* is_far */ true); ++ %} + -+ ins_pipe(pipe_class_call); ++ ins_pipe(pipe_class_compare); +%} + -+// Return Instruction -+// epilog node loads ret address into ra as part of frame pop -+instruct Ret() ++// Double compare and branch instructions ++instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) +%{ -+ match(Return); ++ match(If cmp (CmpD op1 op2)); ++ effect(USE lbl); + -+ ins_cost(BRANCH_COST); -+ format %{ "ret\t// return register, #@Ret" %} ++ ins_cost(XFER_COST + BRANCH_COST * 2); ++ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%} + -+ ins_encode(riscv_enc_ret()); ++ ins_encode %{ ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} + -+ ins_pipe(pipe_branch); ++ ins_pipe(pipe_class_compare); +%} + -+// Die now. -+instruct ShouldNotReachHere() %{ -+ match(Halt); -+ -+ ins_cost(BRANCH_COST); ++instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpD op1 op2)); ++ effect(USE lbl); + -+ format %{ "#@ShouldNotReachHere" %} ++ ins_cost(XFER_COST + BRANCH_COST * 2); ++ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%} + + ins_encode %{ -+ if (is_reachable()) { -+ __ halt(); -+ } ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(pipe_class_default); ++ ins_pipe(pipe_class_compare); +%} + ++instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(If cmp (CmpI op1 zero)); + -+//----------PEEPHOLE RULES----------------------------------------------------- -+// These must follow all instruction definitions as they use the names -+// defined in the instructions definitions. -+// -+// peepmatch ( root_instr_name [preceding_instruction]* ); -+// -+// peepconstraint %{ -+// (instruction_number.operand_name relational_op instruction_number.operand_name -+// [, ...] ); -+// // instruction numbers are zero-based using left to right order in peepmatch -+// -+// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); -+// // provide an instruction_number.operand_name for each operand that appears -+// // in the replacement instruction's match rule -+// -+// ---------VM FLAGS--------------------------------------------------------- -+// -+// All peephole optimizations can be turned off using -XX:-OptoPeephole -+// -+// Each peephole rule is given an identifying number starting with zero and -+// increasing by one in the order seen by the parser. An individual peephole -+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# -+// on the command-line. -+// -+// ---------CURRENT LIMITATIONS---------------------------------------------- -+// -+// Only match adjacent instructions in same basic block -+// Only equality constraints -+// Only constraints between operands, not (0.dest_reg == RAX_enc) -+// Only one replacement instruction -+// -+//----------SMARTSPILL RULES--------------------------------------------------- -+// These must follow all instruction definitions as they use the names -+// defined in the instructions definitions. -+ -+// Local Variables: -+// mode: c++ -+// End: -diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad -new file mode 100644 -index 000000000..6f7055a39 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/riscv_b.ad -@@ -0,0 +1,605 @@ -+// -+// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -+// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// -+// ++ effect(USE op1, USE lbl); + -+// RISCV Bit-Manipulation Extension Architecture Description File ++ ins_cost(BRANCH_COST * 2); + -+instruct rorI_imm_b(iRegINoSp dst, iRegI src, immI rshift, immI lshift) %{ -+ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); -+ predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 32)); -+ effect(DEF dst, USE src); -+ -+ format %{ "roriw $dst, $src, ($rshift & 0x1f)\t#@rorI_imm_b" %} ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x1f); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(pipe_cmpz_branch); +%} + -+instruct rorL_imm_b(iRegLNoSp dst, iRegL src, immI rshift, immI lshift) %{ -+ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); -+ predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 64)); -+ effect(DEF dst, USE src); -+ -+ format %{ "rori $dst, $src, ($rshift & 0x3f)\t#@rorL_imm_b" %} ++instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpI op1 zero)); + -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ rori(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x3f); -+ %} ++ effect(USE op1, USE lbl); + -+ ins_pipe(ialu_reg_shift); -+%} ++ ins_cost(BRANCH_COST * 2); + -+// ror expander -+instruct rorI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ -+ effect(DEF dst, USE src, USE shift); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %} + -+ format %{ "rorw $dst, $src, $shift\t#@rorI_reg_b" %} -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); +%} + -+// ror expander -+instruct rorL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ -+ effect(DEF dst, USE src, USE shift); ++instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(If cmp (CmpU op1 zero)); + -+ format %{ "ror $dst, $src, $shift\t#@rorL_reg_b" %} -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); -+ %} -+ ins_pipe(ialu_reg_reg); -+%} ++ effect(USE op1, USE lbl); + ++ ins_cost(BRANCH_COST * 2); + -+instruct rorI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI imm32 shift)))); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %} + -+ expand %{ -+ rorI_reg_b(dst, src, shift); ++ ins_encode %{ ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} -+%} -+ -+instruct rorI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI zero shift)))); + -+ expand %{ -+ rorI_reg_b(dst, src, shift); -+ %} ++ ins_pipe(pipe_cmpz_branch); +%} + -+instruct rorL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI imm64 shift)))); ++instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpU op1 zero)); + -+ expand %{ -+ rorL_reg_b(dst, src, shift); -+ %} -+%} ++ effect(USE op1, USE lbl); + -+instruct rorL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI zero shift)))); ++ ins_cost(BRANCH_COST * 2); + -+ expand %{ -+ rorL_reg_b(dst, src, shift); -+ %} -+%} ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %} + -+// rol expander -+instruct rolI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ -+ effect(DEF dst, USE src, USE shift); + -+ format %{ "rolw $dst, $src, $shift\t#@rolI_reg_b" %} -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); +%} + -+// rol expander -+instruct rolL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ -+ effect(DEF dst, USE src, USE shift); ++// compare lt/ge unsigned instructs has no short instruct with same match ++instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(If cmp (CmpU op1 zero)); + -+ format %{ "rol $dst, $src, $shift\t#@rolL_reg_b" %} -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); -+ %} -+ -+ ins_pipe(ialu_reg_reg); -+%} ++ effect(USE op1, USE lbl); + -+instruct rolI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI imm32 shift)))); ++ ins_cost(BRANCH_COST); + -+ expand %{ -+ rolI_reg_b(dst, src, shift); -+ %} -+%} ++ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %} + -+instruct rolI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI zero shift)))); ++ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); + -+ expand %{ -+ rolI_reg_b(dst, src, shift); -+ %} ++ ins_pipe(pipe_cmpz_branch); +%} + -+instruct rolL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI imm64 shift)))); ++instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpU op1 zero)); + -+ expand %{ -+ rolL_reg_b(dst, src, shift); -+ %} -+%} ++ effect(USE op1, USE lbl); + -+instruct rolL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI zero shift)))); ++ ins_cost(BRANCH_COST); + -+ expand %{ -+ rolL_reg_b(dst, src, shift); -+ %} -+%} ++ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %} + -+// Convert oop into int for vectors alignment masking -+instruct convP2I_b(iRegINoSp dst, iRegP src) %{ -+ predicate(UseZba); -+ match(Set dst (ConvL2I (CastP2X src))); ++ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); + -+ format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_b" %} ++ ins_pipe(pipe_cmpz_branch); ++%} + -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); -+ %} ++instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(If cmp (CmpL op1 zero)); + -+ ins_pipe(ialu_reg); -+%} ++ effect(USE op1, USE lbl); + -+// byte to int -+instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{ -+ predicate(UseZbb); -+ match(Set dst (RShiftI (LShiftI src lshift) rshift)); ++ ins_cost(BRANCH_COST * 2); + -+ format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_b" %} ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ sext_b(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmpz_branch); +%} + -+// int to short -+instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{ -+ predicate(UseZbb); -+ match(Set dst (RShiftI (LShiftI src lshift) rshift)); -+ -+ format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_b" %} -+ -+ ins_cost(ALU_COST); -+ ins_encode %{ -+ __ sext_h(as_Register($dst$$reg), as_Register($src$$reg)); -+ %} ++instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpL op1 zero)); + -+ ins_pipe(ialu_reg); -+%} ++ effect(USE op1, USE lbl); + -+// short to unsigned int -+instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{ -+ predicate(UseZbb); -+ match(Set dst (AndI src mask)); ++ ins_cost(BRANCH_COST * 2); + -+ format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %} ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ zext_h(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmpz_branch); +%} + -+// int to unsigned long (zero extend) -+instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ -+ predicate(UseZba); -+ match(Set dst (AndL (ConvI2L src) mask)); ++instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(If cmp (CmpUL op1 zero)); + -+ format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %} ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %} + -+ ins_cost(ALU_COST); + ins_encode %{ -+ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg_shift); ++ ins_pipe(pipe_cmpz_branch); +%} + -+// BSWAP instructions -+instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); -+ match(Set dst (ReverseBytesI src)); ++instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpUL op1 zero)); + -+ ins_cost(ALU_COST * 2); -+ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_b" %} ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %} + + ins_encode %{ -+ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmpz_branch); +%} + -+instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{ -+ predicate(UseZbb); -+ match(Set dst (ReverseBytesL src)); ++// compare lt/ge unsigned instructs has no short instruct with same match ++instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(If cmp (CmpUL op1 zero)); + -+ ins_cost(ALU_COST); -+ format %{ "rev8 $dst, $src\t#@bytes_reverse_long_b" %} ++ effect(USE op1, USE lbl); + -+ ins_encode %{ -+ __ rev8(as_Register($dst$$reg), as_Register($src$$reg)); -+ %} ++ ins_cost(BRANCH_COST); + -+ ins_pipe(ialu_reg); -+%} ++ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %} + -+instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); -+ match(Set dst (ReverseBytesUS src)); ++ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); + -+ ins_cost(ALU_COST * 2); -+ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_b" %} ++ ins_pipe(pipe_cmpz_branch); ++%} + -+ ins_encode %{ -+ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); -+ %} ++instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpUL op1 zero)); + -+ ins_pipe(ialu_reg); -+%} ++ effect(USE op1, USE lbl); + -+instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); -+ match(Set dst (ReverseBytesS src)); ++ ins_cost(BRANCH_COST); + -+ ins_cost(ALU_COST * 2); -+ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_b" %} ++ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %} + -+ ins_encode %{ -+ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); -+ %} ++ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmpz_branch); +%} + -+// Shift Add Pointer -+instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{ -+ predicate(UseZba); -+ match(Set dst (AddP src1 (LShiftL src2 imm))); ++instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE lbl); + -+ ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %} ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %} + + ins_encode %{ -+ __ shadd(as_Register($dst$$reg), -+ as_Register($src2$$reg), -+ as_Register($src1$$reg), -+ t0, -+ $imm$$constant); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); +%} + -+instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{ -+ predicate(UseZba); -+ match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm))); ++instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpP op1 zero)); ++ effect(USE lbl); + -+ ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %} ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %} + + ins_encode %{ -+ __ shadd(as_Register($dst$$reg), -+ as_Register($src2$$reg), -+ as_Register($src1$$reg), -+ t0, -+ $imm$$constant); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); +%} + -+// Shift Add Long -+instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{ -+ predicate(UseZba); -+ match(Set dst (AddL src1 (LShiftL src2 imm))); ++instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ ++ match(If cmp (CmpN op1 zero)); ++ effect(USE lbl); + -+ ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %} ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %} + + ins_encode %{ -+ __ shadd(as_Register($dst$$reg), -+ as_Register($src2$$reg), -+ as_Register($src1$$reg), -+ t0, -+ $imm$$constant); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); +%} + -+instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{ -+ predicate(UseZba); -+ match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm))); ++instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpN op1 zero)); ++ effect(USE lbl); + -+ ins_cost(ALU_COST); -+ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %} ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %} + + ins_encode %{ -+ __ shadd(as_Register($dst$$reg), -+ as_Register($src2$$reg), -+ as_Register($src1$$reg), -+ t0, -+ $imm$$constant); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_cmpz_branch); +%} + -+// Zeros Count instructions -+instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); -+ match(Set dst (CountLeadingZerosI src)); ++instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ effect(USE lbl); + -+ ins_cost(ALU_COST); -+ format %{ "clzw $dst, $src\t#@countLeadingZerosI_b" %} ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %} + + ins_encode %{ -+ __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmpz_branch); +%} + -+instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{ -+ predicate(UseZbb); -+ match(Set dst (CountLeadingZerosL src)); ++instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero)); ++ effect(USE lbl); + -+ ins_cost(ALU_COST); -+ format %{ "clz $dst, $src\t#@countLeadingZerosL_b" %} ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %} + + ins_encode %{ -+ __ clz(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_cmpz_branch); +%} + -+instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UseZbb); -+ match(Set dst (CountTrailingZerosI src)); ++// ============================================================================ ++// Conditional Move Instructions ++instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); + -+ ins_cost(ALU_COST); -+ format %{ "ctzw $dst, $src\t#@countTrailingZerosI_b" %} ++ format %{ ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpI\n\t" ++ %} + + ins_encode %{ -+ __ ctzw(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ enc_cmove($cop$$cmpcode, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_class_compare); +%} + -+instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{ -+ predicate(UseZbb); -+ match(Set dst (CountTrailingZerosL src)); ++instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); + -+ ins_cost(ALU_COST); -+ format %{ "ctz $dst, $src\t#@countTrailingZerosL_b" %} ++ format %{ ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpU\n\t" ++ %} + + ins_encode %{ -+ __ ctz(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_class_compare); +%} + -+// Population Count instructions -+instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{ -+ predicate(UsePopCountInstruction); -+ match(Set dst (PopCountI src)); ++instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); + -+ ins_cost(ALU_COST); -+ format %{ "cpopw $dst, $src\t#@popCountI_b" %} ++ format %{ ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpL\n\t" ++ %} + + ins_encode %{ -+ __ cpopw(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ enc_cmove($cop$$cmpcode, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_class_compare); +%} + -+// Note: Long/bitCount(long) returns an int. -+instruct popCountL_b(iRegINoSp dst, iRegL src) %{ -+ predicate(UsePopCountInstruction); -+ match(Set dst (PopCountL src)); ++instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); + -+ ins_cost(ALU_COST); -+ format %{ "cpop $dst, $src\t#@popCountL_b" %} ++ format %{ ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovI_cmpUL\n\t" ++ %} + + ins_encode %{ -+ __ cpop(as_Register($dst$$reg), as_Register($src$$reg)); ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg); ++ ins_pipe(pipe_class_compare); +%} + -+// Max and Min -+instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ predicate(UseZbb); -+ match(Set dst (MinI src1 src2)); ++instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); + -+ ins_cost(ALU_COST); -+ format %{ "min $dst, $src1, $src2\t#@minI_reg_b" %} ++ format %{ ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpL\n\t" ++ %} + + ins_encode %{ -+ __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); ++ __ enc_cmove($cop$$cmpcode, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_class_compare); +%} + -+instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{ -+ predicate(UseZbb); -+ match(Set dst (MaxI src1 src2)); ++instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); + -+ ins_cost(ALU_COST); -+ format %{ "max $dst, $src1, $src2\t#@maxI_reg_b" %} ++ format %{ ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpUL\n\t" ++ %} + + ins_encode %{ -+ __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_class_compare); +%} + -+// Abs -+instruct absI_reg_b(iRegINoSp dst, iRegI src) %{ -+ predicate(UseZbb); -+ match(Set dst (AbsI src)); ++instruct cmovL_cmpI(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpI op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); + -+ ins_cost(ALU_COST * 2); + format %{ -+ "negw t0, $src\n\t" -+ "max $dst, $src, t0\t#@absI_reg_b" ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpI\n\t" + %} + + ins_encode %{ -+ __ negw(t0, as_Register($src$$reg)); -+ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ enc_cmove($cop$$cmpcode, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_class_compare); +%} + -+instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{ -+ predicate(UseZbb); -+ match(Set dst (AbsL src)); ++instruct cmovL_cmpU(iRegLNoSp dst, iRegL src, iRegI op1, iRegI op2, cmpOpU cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpU op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); + -+ ins_cost(ALU_COST * 2); + format %{ -+ "neg t0, $src\n\t" -+ "max $dst, $src, t0\t#@absL_reg_b" ++ "CMove $dst, ($op1 $cop $op2), $dst, $src\t#@cmovL_cmpU\n\t" + %} + + ins_encode %{ -+ __ neg(t0, as_Register($src$$reg)); -+ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); + %} + -+ ins_pipe(ialu_reg_reg); ++ ins_pipe(pipe_class_compare); +%} + -+// And Not -+instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ -+ predicate(UseZbb); -+ match(Set dst (AndI src1 (XorI src2 m1))); ++// ============================================================================ ++// Procedure Call/Return Instructions + -+ ins_cost(ALU_COST); -+ format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_b" %} ++// Call Java Static Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallStaticJavaDirect(method meth) ++%{ ++ match(CallStaticJava); + -+ ins_encode %{ -+ __ andn(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); -+ %} ++ effect(USE meth); + -+ ins_pipe(ialu_reg_reg); ++ ins_cost(BRANCH_COST); ++ ++ format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %} ++ ++ ins_encode(riscv_enc_java_static_call(meth), ++ riscv_enc_call_epilog); ++ ++ ins_pipe(pipe_class_call); ++ ins_alignment(4); +%} + -+instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ -+ predicate(UseZbb); -+ match(Set dst (AndL src1 (XorL src2 m1))); ++// TO HERE + -+ ins_cost(ALU_COST); -+ format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_b" %} ++// Call Java Dynamic Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallDynamicJavaDirect(method meth, rFlagsReg cr) ++%{ ++ match(CallDynamicJava); + -+ ins_encode %{ -+ __ andn(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); -+ %} ++ effect(USE meth, KILL cr); + -+ ins_pipe(ialu_reg_reg); ++ ins_cost(BRANCH_COST + ALU_COST * 6); ++ ++ format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %} ++ ++ ins_encode(riscv_enc_java_dynamic_call(meth), ++ riscv_enc_call_epilog); ++ ++ ins_pipe(pipe_class_call); ++ ins_alignment(4); +%} + -+// Or Not -+instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ -+ predicate(UseZbb); -+ match(Set dst (OrI src1 (XorI src2 m1))); ++// Call Runtime Instruction + -+ ins_cost(ALU_COST); -+ format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_b" %} ++instruct CallRuntimeDirect(method meth, rFlagsReg cr) ++%{ ++ match(CallRuntime); + -+ ins_encode %{ -+ __ orn(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); -+ %} ++ effect(USE meth, KILL cr); + -+ ins_pipe(ialu_reg_reg); ++ ins_cost(BRANCH_COST); ++ ++ format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %} ++ ++ ins_encode(riscv_enc_java_to_runtime(meth)); ++ ++ ins_pipe(pipe_class_call); +%} + -+instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ -+ predicate(UseZbb); -+ match(Set dst (OrL src1 (XorL src2 m1))); ++// Call Runtime Instruction + -+ ins_cost(ALU_COST); -+ format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_b" %} ++instruct CallLeafDirect(method meth, rFlagsReg cr) ++%{ ++ match(CallLeaf); + -+ ins_encode %{ -+ __ orn(as_Register($dst$$reg), -+ as_Register($src1$$reg), -+ as_Register($src2$$reg)); -+ %} ++ effect(USE meth, KILL cr); + -+ ins_pipe(ialu_reg_reg); ++ ins_cost(BRANCH_COST); ++ ++ format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %} ++ ++ ins_encode(riscv_enc_java_to_runtime(meth)); ++ ++ ins_pipe(pipe_class_call); +%} -diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad -new file mode 100644 -index 000000000..905041890 ---- /dev/null -+++ b/src/hotspot/cpu/riscv/riscv_v.ad -@@ -0,0 +1,1723 @@ -+// -+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -+// Copyright (c) 2020, Arm Limited. All rights reserved. -+// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// ++ ++// Call Runtime Instruction ++ ++instruct CallLeafNoFPDirect(method meth, rFlagsReg cr) ++%{ ++ match(CallLeafNoFP); ++ ++ effect(USE meth, KILL cr); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %} ++ ++ ins_encode(riscv_enc_java_to_runtime(meth)); ++ ++ ins_pipe(pipe_class_call); ++%} ++ ++// ============================================================================ ++// Partial Subtype Check +// ++// superklass array for an instance of the superklass. Set a hidden ++// internal cache on a hit (cache is checked with exposed code in ++// gen_subtype_check()). Return zero for a hit. The encoding ++// ALSO sets flags. ++ ++instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, rFlagsReg cr) ++%{ ++ match(Set result (PartialSubtypeCheck sub super)); ++ effect(KILL tmp, KILL cr); ++ ++ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); ++ format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %} + -+// RISCV VEC Architecture Description File ++ ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result)); + -+opclass vmemA(indirect); ++ opcode(0x1); // Force zero of result reg on hit + -+source_hpp %{ -+ bool op_vec_supported(int opcode); ++ ins_pipe(pipe_class_memory); +%} + -+source %{ ++instruct partialSubtypeCheckVsZero(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 tmp, ++ immP0 zero, rFlagsReg cr) ++%{ ++ match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); ++ effect(KILL tmp, KILL result); + -+ static inline BasicType vector_element_basic_type(const MachNode* n) { -+ const TypeVect* vt = n->bottom_type()->is_vect(); -+ return vt->element_basic_type(); -+ } ++ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); ++ format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %} + -+ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { -+ int def_idx = use->operand_index(opnd); -+ Node* def = use->in(def_idx); -+ const TypeVect* vt = def->bottom_type()->is_vect(); -+ return vt->element_basic_type(); -+ } ++ ins_encode(riscv_enc_partial_subtype_check(sub, super, tmp, result)); + -+ static void loadStore(MacroAssembler masm, bool is_store, -+ VectorRegister reg, BasicType bt, Register base) { -+ Assembler::SEW sew = Assembler::elemtype_to_sew(bt); -+ masm.vsetvli(t0, x0, sew); -+ if (is_store) { -+ masm.vsex_v(reg, base, sew); -+ } else { -+ masm.vlex_v(reg, base, sew); -+ } -+ } -+ -+ bool op_vec_supported(int opcode) { -+ switch (opcode) { -+ // No multiply reduction instructions -+ case Op_MulReductionVD: -+ case Op_MulReductionVF: -+ case Op_MulReductionVI: -+ case Op_MulReductionVL: -+ // Others -+ case Op_Extract: -+ case Op_ExtractB: -+ case Op_ExtractC: -+ case Op_ExtractD: -+ case Op_ExtractF: -+ case Op_ExtractI: -+ case Op_ExtractL: -+ case Op_ExtractS: -+ case Op_ExtractUB: -+ return false; -+ default: -+ return UseRVV; -+ } -+ } ++ opcode(0x0); // Don't zero result reg on hit + ++ ins_pipe(pipe_class_memory); +%} + -+definitions %{ -+ int_def VEC_COST (200, 200); ++instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) ++%{ ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} ++ ins_encode %{ ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, ++ StrIntrinsicNode::UU); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) ++%{ ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, ++ StrIntrinsicNode::LL); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) ++%{ ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ ++ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, ++ StrIntrinsicNode::UL); ++ %} ++ ins_pipe(pipe_class_memory); +%} + -+// All VEC instructions ++instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, ++ rFlagsReg cr) ++%{ ++ predicate(((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); ++ match(Set result (StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); + -+// vector load/store -+instruct loadV(vReg dst, vmemA mem) %{ -+ match(Set dst (LoadVector mem)); -+ ins_cost(VEC_COST); -+ format %{ "vle $dst, $mem\t#@loadV" %} ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} + ins_encode %{ -+ VectorRegister dst_reg = as_VectorRegister($dst$$reg); -+ loadStore(MacroAssembler(&cbuf), false, dst_reg, -+ vector_element_basic_type(this), as_Register($mem$$base)); ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, ++ StrIntrinsicNode::LU); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct storeV(vReg src, vmemA mem) %{ -+ match(Set mem (StoreVector mem src)); -+ ins_cost(VEC_COST); -+ format %{ "vse $src, $mem\t#@storeV" %} ++instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, ++ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); ++ ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %} + ins_encode %{ -+ VectorRegister src_reg = as_VectorRegister($src$$reg); -+ loadStore(MacroAssembler(&cbuf), true, src_reg, -+ vector_element_basic_type(this, $src), as_Register($mem$$base)); ++ __ string_indexof($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ $tmp5$$Register, $tmp6$$Register, ++ $result$$Register, StrIntrinsicNode::UU); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+// vector abs ++instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, ++ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); + -+instruct vabsB(vReg dst, vReg src, vReg tmp) %{ -+ match(Set dst (AbsVB src)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t" -+ "vmax.vv $dst, $tmp, $src" %} ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); ++ __ string_indexof($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ $tmp5$$Register, $tmp6$$Register, ++ $result$$Register, StrIntrinsicNode::LL); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct vabsS(vReg dst, vReg src, vReg tmp) %{ -+ match(Set dst (AbsVS src)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t" -+ "vmax.vv $dst, $tmp, $src" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, ++ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr); ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %} + -+instruct vabsI(vReg dst, vReg src, vReg tmp) %{ -+ match(Set dst (AbsVI src)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t" -+ "vmax.vv $dst, $tmp, $src" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); ++ __ string_indexof($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ $tmp5$$Register, $tmp6$$Register, ++ $result$$Register, StrIntrinsicNode::UL); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct vabsL(vReg dst, vReg src, vReg tmp) %{ -+ match(Set dst (AbsVL src)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t" -+ "vmax.vv $dst, $tmp, $src" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); -+ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, ++ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + -+instruct vabsF(vReg dst, vReg src) %{ -+ match(Set dst (AbsVF src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %} + -+instruct vabsD(vReg dst, vReg src) %{ -+ match(Set dst (AbsVD src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); ++ int icnt2 = (int)$int_cnt2$$constant; ++ __ string_indexof_linearscan($str1$$Register, $str2$$Register, ++ $cnt1$$Register, zr, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ icnt2, $result$$Register, StrIntrinsicNode::UU); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+// vector add ++instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, ++ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + -+instruct vaddB(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVB src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %} ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ int icnt2 = (int)$int_cnt2$$constant; ++ __ string_indexof_linearscan($str1$$Register, $str2$$Register, ++ $cnt1$$Register, zr, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ icnt2, $result$$Register, StrIntrinsicNode::LL); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct vaddS(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVS src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, ++ immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + -+instruct vaddI(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVI src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %} ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ int icnt2 = (int)$int_cnt2$$constant; ++ __ string_indexof_linearscan($str1$$Register, $str2$$Register, ++ $cnt1$$Register, zr, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ icnt2, $result$$Register, StrIntrinsicNode::UL); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct vaddL(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVL src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr) ++%{ ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); + -+instruct vaddF(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVF src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %} ++ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, false /* isU */); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct vaddD(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AddVD src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfadd_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} + -+// vector and ++// clearing of an array ++instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy) ++%{ ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL cnt, USE_KILL base); ++ ++ ins_cost(4 * DEFAULT_COST); ++ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} + -+instruct vand(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (AndV src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vand.vv $dst, $src1, $src2\t#@vand" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vand_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ address tpc = __ zero_words($base$$Register, $cnt$$Register); ++ if (tpc == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_class_memory); +%} + -+// vector or ++instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) ++%{ ++ predicate((uint64_t)n->in(2)->get_long() < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL base, KILL cr); ++ ++ ins_cost(4 * DEFAULT_COST); ++ format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %} + -+instruct vor(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (OrV src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vor.vv $dst, $src1, $src2\t#@vor" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vor_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ zero_words($base$$Register, (uint64_t)$cnt$$constant); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_class_memory); +%} + -+// vector xor ++instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, ++ iRegI_R10 result, rFlagsReg cr) ++%{ ++ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + -+instruct vxor(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (XorV src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vxor.vv $dst, $src1, $src2\t#@vxor" %} ++ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vxor_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_equals($str1$$Register, $str2$$Register, ++ $result$$Register, $cnt$$Register, 1); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+// vector float div ++instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, ++ iRegI_R10 result, rFlagsReg cr) ++%{ ++ predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); + -+instruct vdivF(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (DivVF src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivF" %} ++ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfdiv_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_equals($str1$$Register, $str2$$Register, ++ $result$$Register, $cnt$$Register, 2); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+instruct vdivD(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (DivVD src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivD" %} ++instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, ++ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, ++ iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) ++%{ ++ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (AryEq ary1 ary2)); ++ effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); ++ ++ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp5" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfdiv_vv(as_VectorRegister($dst$$reg), -+ as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ arrays_equals($ary1$$Register, $ary2$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, ++ $result$$Register, $tmp5$$Register, 1); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+// vector fmla ++instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, ++ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, ++ iRegP_R16 tmp4, iRegP_R28 tmp5, rFlagsReg cr) ++%{ ++ predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (AryEq ary1 ary2)); ++ effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp5, KILL cr); + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %} ++ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp5" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ arrays_equals($ary1$$Register, $ary2$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, ++ $result$$Register, $tmp5$$Register, 2); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_memory); +%} + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// ============================================================================ ++// Safepoint Instructions + -+// vector fmls ++instruct safePoint(iRegP poll) ++%{ ++ match(SafePoint poll); + -+// dst_src1 = dst_src1 + -src2 * src3 -+// dst_src1 = dst_src1 + src2 * -src3 -+instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); -+ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); -+ ins_cost(VEC_COST); -+ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ ins_cost(2 * LOAD_COST); ++ format %{ ++ "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint" + %} -+ ins_pipe(pipe_slow); -+%} -+ -+// dst_src1 = dst_src1 + -src2 * src3 -+// dst_src1 = dst_src1 + src2 * -src3 -+instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); -+ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); -+ ins_cost(VEC_COST); -+ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type); + %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem); +%} + -+// vector fnmla ++// ============================================================================ ++// This name is KNOWN by the ADLC and cannot be changed. ++// The ADLC forces a 'TypeRawPtr::BOTTOM' output type ++// for this guy. ++instruct tlsLoadP(javaThread_RegP dst) ++%{ ++ match(Set dst (ThreadLocal)); + -+// dst_src1 = -dst_src1 + -src2 * src3 -+// dst_src1 = -dst_src1 + src2 * -src3 -+instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); -+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); -+ ins_cost(VEC_COST); -+ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(0); + -+// dst_src1 = -dst_src1 + -src2 * src3 -+// dst_src1 = -dst_src1 + src2 * -src3 -+instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); -+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); -+ ins_cost(VEC_COST); -+ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %} + -+// vector fnmls ++ size(0); + -+// dst_src1 = -dst_src1 + src2 * src3 -+instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_encode( /*empty*/ ); + -+// dst_src1 = -dst_src1 + src2 * src3 -+instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ -+ predicate(UseFMA); -+ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_empty); +%} + -+// vector mla ++// inlined locking and unlocking ++// using t1 as the 'flag' register to bridge the BoolNode producers and consumers ++instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) ++%{ ++ match(Set cr (FastLock object box)); ++ effect(TEMP tmp1, TEMP tmp2); + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3); ++ format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2, #@cmpFastLock" %} + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_encode(riscv_enc_fast_lock(object, box, tmp1, tmp2)); + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+// dst_src1 = dst_src1 + src2 * src3 -+instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// using t1 as the 'flag' register to bridge the BoolNode producers and consumers ++instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) ++%{ ++ match(Set cr (FastUnlock object box)); ++ effect(TEMP tmp1, TEMP tmp2); + -+// vector mls ++ ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4); ++ format %{ "fastunlock $object,$box\t! kills $tmp1, $tmp2, #@cmpFastUnlock" %} + -+// dst_src1 = dst_src1 - src2 * src3 -+instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_encode(riscv_enc_fast_unlock(object, box, tmp1, tmp2)); + -+// dst_src1 = dst_src1 - src2 * src3 -+instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_serial); +%} + -+// dst_src1 = dst_src1 - src2 * src3 -+instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// Tail Call; Jump from runtime stub to Java code. ++// Also known as an 'interprocedural jump'. ++// Target of jump will eventually return to caller. ++// TailJump below removes the return address. ++instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop) ++%{ ++ match(TailCall jump_target method_oop); + -+// dst_src1 = dst_src1 - src2 * src3 -+instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{ -+ match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); -+ ins_cost(VEC_COST); -+ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), -+ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(BRANCH_COST); + -+// vector mul ++ format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %} + -+instruct vmulB(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVB src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_encode(riscv_enc_tail_call(jump_target)); + -+instruct vmulS(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVS src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_call); +%} + -+instruct vmulI(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVI src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop) ++%{ ++ match(TailJump jump_target ex_oop); + -+instruct vmulL(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVL src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(ALU_COST + BRANCH_COST); + -+instruct vmulF(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVF src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %} + -+instruct vmulD(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (MulVD src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_encode(riscv_enc_tail_jmp(jump_target)); ++ ++ ins_pipe(pipe_class_call); +%} + -+// vector fneg ++// Create exception oop: created by stack-crawling runtime code. ++// Created exception is now available to this handler, and is setup ++// just prior to jumping to this handler. No code emitted. ++instruct CreateException(iRegP_R10 ex_oop) ++%{ ++ match(Set ex_oop (CreateEx)); + -+instruct vnegF(vReg dst, vReg src) %{ -+ match(Set dst (NegVF src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(0); ++ format %{ " -- \t// exception oop; no code emitted, #@CreateException" %} + -+instruct vnegD(vReg dst, vReg src) %{ -+ match(Set dst (NegVD src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ size(0); + -+// popcount vector ++ ins_encode( /*empty*/ ); + -+instruct vpopcountI(iRegINoSp dst, vReg src) %{ -+ match(Set dst (PopCountVI src)); -+ format %{ "vpopc.m $dst, $src\t#@vpopcountI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(pipe_class_empty); +%} + -+// vector add reduction ++// Rethrow exception: The exception oop will come in the first ++// argument position. Then JUMP (not call) to the rethrow stub code. ++instruct RethrowException() ++%{ ++ match(Rethrow); + -+instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); -+ match(Set dst (AddReductionVI src1 src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t" -+ "vredsum.vs $tmp, $src2, $tmp\n\t" -+ "vmv.x.s $dst, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_cost(BRANCH_COST); ++ ++ format %{ "j rethrow_stub\t#@RethrowException" %} ++ ++ ins_encode(riscv_enc_rethrow()); ++ ++ ins_pipe(pipe_class_call); +%} + -+instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); -+ match(Set dst (AddReductionVI src1 src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t" -+ "vredsum.vs $tmp, $src2, $tmp\n\t" -+ "vmv.x.s $dst, $tmp" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++// Return Instruction ++// epilog node loads ret address into ra as part of frame pop ++instruct Ret() ++%{ ++ match(Return); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "ret\t// return register, #@Ret" %} ++ ++ ins_encode(riscv_enc_ret()); ++ ++ ins_pipe(pipe_branch); +%} + -+instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); -+ match(Set dst (AddReductionVI src1 src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t" -+ "vredsum.vs $tmp, $src2, $tmp\n\t" -+ "vmv.x.s $dst, $tmp" %} ++// Die now. ++instruct ShouldNotReachHere() %{ ++ match(Halt); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "#@ShouldNotReachHere" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ Assembler::CompressibleRegion cr(&_masm); ++ if (is_reachable()) { ++ __ halt(); ++ } + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(pipe_class_default); +%} + -+instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ -+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); -+ match(Set dst (AddReductionVL src1 src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t" -+ "vredsum.vs $tmp, $src2, $tmp\n\t" -+ "vmv.x.s $dst, $tmp" %} ++ ++//----------PEEPHOLE RULES----------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++// ++// peepmatch ( root_instr_name [preceding_instruction]* ); ++// ++// peepconstraint %{ ++// (instruction_number.operand_name relational_op instruction_number.operand_name ++// [, ...] ); ++// // instruction numbers are zero-based using left to right order in peepmatch ++// ++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); ++// // provide an instruction_number.operand_name for each operand that appears ++// // in the replacement instruction's match rule ++// ++// ---------VM FLAGS--------------------------------------------------------- ++// ++// All peephole optimizations can be turned off using -XX:-OptoPeephole ++// ++// Each peephole rule is given an identifying number starting with zero and ++// increasing by one in the order seen by the parser. An individual peephole ++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# ++// on the command-line. ++// ++// ---------CURRENT LIMITATIONS---------------------------------------------- ++// ++// Only match adjacent instructions in same basic block ++// Only equality constraints ++// Only constraints between operands, not (0.dest_reg == RAX_enc) ++// Only one replacement instruction ++// ++//----------SMARTSPILL RULES--------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++ ++// Local Variables: ++// mode: c++ ++// End: +diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad +new file mode 100644 +index 0000000000..7dda004cd3 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/riscv_b.ad +@@ -0,0 +1,466 @@ ++// ++// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// RISCV Bit-Manipulation Extension Architecture Description File ++ ++// Convert oop into int for vectors alignment masking ++instruct convP2I_b(iRegINoSp dst, iRegP src) %{ ++ predicate(UseZba); ++ match(Set dst (ConvL2I (CastP2X src))); ++ ++ format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_b" %} ++ ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); -+ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{ -+ match(Set src1_dst (AddReductionVF src1_dst src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t" -+ "vfredosum.vs $tmp, $src2, $tmp\n\t" -+ "vfmv.f.s $src1_dst, $tmp" %} ++// byte to int ++instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{ ++ predicate(UseZbb); ++ match(Set dst (RShiftI (LShiftI src lshift) rshift)); ++ ++ format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_b" %} ++ ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); -+ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); ++ __ sext_b(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{ -+ match(Set src1_dst (AddReductionVD src1_dst src2)); -+ effect(TEMP tmp); -+ ins_cost(VEC_COST); -+ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t" -+ "vfredosum.vs $tmp, $src2, $tmp\n\t" -+ "vfmv.f.s $src1_dst, $tmp" %} ++// int to short ++instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{ ++ predicate(UseZbb); ++ match(Set dst (RShiftI (LShiftI src lshift) rshift)); ++ ++ format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_b" %} ++ ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); -+ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), -+ as_VectorRegister($tmp$$reg)); -+ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); ++ __ sext_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+// vector replicate ++// short to unsigned int ++instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{ ++ predicate(UseZbb); ++ match(Set dst (AndI src mask)); ++ ++ format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %} + -+instruct replicateB(vReg dst, iRegIorL2I src) %{ -+ match(Set dst (ReplicateB src)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.x $dst, $src\t#@replicateB" %} ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ __ zext_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); -+%} + -+instruct replicateS(vReg dst, iRegIorL2I src) %{ -+ match(Set dst (ReplicateS src)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.x $dst, $src\t#@replicateS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg); +%} + -+instruct replicateI(vReg dst, iRegIorL2I src) %{ -+ match(Set dst (ReplicateI src)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.x $dst, $src\t#@replicateI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// int to unsigned long (zero extend) ++instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ ++ predicate(UseZba); ++ match(Set dst (AndL (ConvI2L src) mask)); + -+instruct replicateL(vReg dst, iRegL src) %{ -+ match(Set dst (ReplicateL src)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.x $dst, $src\t#@replicateL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %} + -+instruct replicateB_imm5(vReg dst, immI5 con) %{ -+ match(Set dst (ReplicateB con)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.i $dst, $con\t#@replicateB_imm5" %} ++ ins_cost(ALU_COST); + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); -+%} + -+instruct replicateS_imm5(vReg dst, immI5 con) %{ -+ match(Set dst (ReplicateS con)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.i $dst, $con\t#@replicateS_imm5" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg_shift); +%} + -+instruct replicateI_imm5(vReg dst, immI5 con) %{ -+ match(Set dst (ReplicateI con)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.i $dst, $con\t#@replicateI_imm5" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// BSWAP instructions ++instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); ++ match(Set dst (ReverseBytesI src)); + -+instruct replicateL_imm5(vReg dst, immL5 con) %{ -+ match(Set dst (ReplicateL con)); -+ ins_cost(VEC_COST); -+ format %{ "vmv.v.i $dst, $con\t#@replicateL_imm5" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(ALU_COST * 2); ++ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_b" %} + -+instruct replicateF(vReg dst, fRegF src) %{ -+ match(Set dst (ReplicateF src)); -+ ins_cost(VEC_COST); -+ format %{ "vfmv.v.f $dst, $src\t#@replicateF" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); ++ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); -+%} + -+instruct replicateD(vReg dst, fRegD src) %{ -+ match(Set dst (ReplicateD src)); -+ ins_cost(VEC_COST); -+ format %{ "vfmv.v.f $dst, $src\t#@replicateD" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg); +%} + -+// vector shift -+ -+instruct vasrB(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (RShiftVB src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t" -+ "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ BitsPerByte - 1, Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); -+ %} -+ ins_pipe(pipe_slow); -+%} ++instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{ ++ predicate(UseZbb); ++ match(Set dst (ReverseBytesL src)); + -+instruct vasrS(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (RShiftVS src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t" -+ "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ BitsPerShort - 1, Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(ALU_COST); ++ format %{ "rev8 $dst, $src\t#@bytes_reverse_long_b" %} + -+instruct vasrI(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (RShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); ++ __ rev8(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); -+%} + -+instruct vasrL(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (RShiftVL src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg); +%} + -+instruct vlslB(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (LShiftVB src shift)); -+ ins_cost(VEC_COST); -+ effect( TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t" -+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ // if shift > BitsPerByte - 1, clear the element -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg), Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); -+ %} -+ ins_pipe(pipe_slow); -+%} ++instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); ++ match(Set dst (ReverseBytesUS src)); + -+instruct vlslS(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (LShiftVS src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t" -+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ // if shift > BitsPerShort - 1, clear the element -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg), Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(ALU_COST * 2); ++ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_b" %} + -+instruct vlslI(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (LShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); ++ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vlslL(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (LShiftVL src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %} ++instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); ++ match(Set dst (ReverseBytesS src)); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_b" %} ++ + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); ++ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); -+%} + -+instruct vlsrB(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (URShiftVB src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t" -+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0, v0\n\t" -+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ // if shift > BitsPerByte - 1, clear the element -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg), Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg); +%} + -+instruct vlsrS(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (URShiftVS src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP_DEF dst); -+ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t" -+ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" -+ "vmnot.m v0, v0\n\t" -+ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ // if shift > BitsPerShort - 1, clear the element -+ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg), Assembler::v0_t); -+ // otherwise, shift -+ __ vmnot_m(v0, v0); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg), Assembler::v0_t); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// Shift Add Pointer ++instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{ ++ predicate(UseZba); ++ match(Set dst (AddP src1 (LShiftL src2 imm))); + ++ ins_cost(ALU_COST); ++ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %} + -+instruct vlsrI(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (URShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); ++ __ shadd(as_Register($dst$$reg), ++ as_Register($src2$$reg), ++ as_Register($src1$$reg), ++ t0, ++ $imm$$constant); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg_reg); +%} + ++instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{ ++ predicate(UseZba); ++ match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm))); + -+instruct vlsrL(vReg dst, vReg src, vReg shift) %{ -+ match(Set dst (URShiftVL src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($shift$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(ALU_COST); ++ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %} + -+instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (RShiftVB src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %} + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e8); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con >= BitsPerByte) con = BitsPerByte - 1; -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ __ shadd(as_Register($dst$$reg), ++ as_Register($src2$$reg), ++ as_Register($src1$$reg), ++ t0, ++ $imm$$constant); + %} -+ ins_pipe(pipe_slow); -+%} + -+instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (RShiftVS src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e16); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con >= BitsPerShort) con = BitsPerShort - 1; -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (RShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e32); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// Shift Add Long ++instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{ ++ predicate(UseZba); ++ match(Set dst (AddL src1 (LShiftL src2 imm))); + -+instruct vasrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{ -+ predicate((n->in(2)->get_int() & 0x3f) < 64); -+ match(Set dst (RShiftVL src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x3f; -+ __ vsetvli(t0, x0, Assembler::e64); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con < 32) { -+ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ } else { -+ __ li(t0, con); -+ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0); -+ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg)); -+ } -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(ALU_COST); ++ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %} + -+instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (URShiftVB src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %} + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e8); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con >= BitsPerByte) { -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ __ shadd(as_Register($dst$$reg), ++ as_Register($src2$$reg), ++ as_Register($src1$$reg), ++ t0, ++ $imm$$constant); + %} -+ ins_pipe(pipe_slow); -+%} + -+instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (URShiftVS src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e16); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con >= BitsPerShort) { -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (URShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e32); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} ++instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{ ++ predicate(UseZba); ++ match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm))); + -+instruct vlsrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{ -+ predicate((n->in(2)->get_int() & 0x3f) < 64); -+ match(Set dst (URShiftVL src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x3f; -+ __ vsetvli(t0, x0, Assembler::e64); -+ if (con == 0) { -+ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ if (con < 32) { -+ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ } else { -+ __ li(t0, con); -+ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0); -+ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg)); -+ } -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(ALU_COST); ++ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %} + -+instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (LShiftVB src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %} + ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e8); -+ if (con >= BitsPerByte) { -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ __ shadd(as_Register($dst$$reg), ++ as_Register($src2$$reg), ++ as_Register($src1$$reg), ++ t0, ++ $imm$$constant); + %} -+ ins_pipe(pipe_slow); -+%} + -+instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (LShiftVS src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e16); -+ if (con >= BitsPerShort) { -+ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), -+ as_VectorRegister($src$$reg)); -+ return; -+ } -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ -+ match(Set dst (LShiftVI src shift)); -+ ins_cost(VEC_COST); -+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x1f; -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// Zeros Count instructions ++instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); ++ match(Set dst (CountLeadingZerosI src)); + -+instruct vlslL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{ -+ predicate((n->in(2)->get_int() & 0x3f) < 64); -+ match(Set dst (LShiftVL src shift)); -+ ins_cost(VEC_COST); -+ effect(TEMP tmp); -+ format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %} -+ ins_encode %{ -+ uint32_t con = (unsigned)$shift$$constant & 0x3f; -+ __ vsetvli(t0, x0, Assembler::e64); -+ if (con < 32) { -+ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); -+ } else { -+ __ li(t0, con); -+ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0); -+ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg)); -+ } -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(ALU_COST); ++ format %{ "clzw $dst, $src\t#@countLeadingZerosI_b" %} + -+instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); -+ match(Set dst (LShiftCntV cnt)); -+ match(Set dst (RShiftCntV cnt)); -+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); -+%} + -+instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || -+ n->bottom_type()->is_vect()->element_basic_type() == T_CHAR); -+ match(Set dst (LShiftCntV cnt)); -+ match(Set dst (RShiftCntV cnt)); -+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg); +%} + -+instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT); -+ match(Set dst (LShiftCntV cnt)); -+ match(Set dst (RShiftCntV cnt)); -+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{ ++ predicate(UseZbb); ++ match(Set dst (CountLeadingZerosL src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "clz $dst, $src\t#@countLeadingZerosL_b" %} + -+instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ -+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG); -+ match(Set dst (LShiftCntV cnt)); -+ match(Set dst (RShiftCntV cnt)); -+ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ __ clz(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+// vector sqrt ++instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); ++ match(Set dst (CountTrailingZerosI src)); + -+instruct vsqrtF(vReg dst, vReg src) %{ -+ match(Set dst (SqrtVF src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(ALU_COST); ++ format %{ "ctzw $dst, $src\t#@countTrailingZerosI_b" %} + -+instruct vsqrtD(vReg dst, vReg src) %{ -+ match(Set dst (SqrtVD src)); -+ ins_cost(VEC_COST); -+ format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ __ ctzw(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+// vector sub ++instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{ ++ predicate(UseZbb); ++ match(Set dst (CountTrailingZerosL src)); + -+instruct vsubB(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVB src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e8); -+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(ALU_COST); ++ format %{ "ctz $dst, $src\t#@countTrailingZerosL_b" %} + -+instruct vsubS(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVS src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e16); -+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ ctz(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); -+%} + -+instruct vsubI(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVI src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); ++ ins_pipe(ialu_reg); +%} + -+instruct vsubL(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVL src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++// Population Count instructions ++instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UsePopCountInstruction); ++ match(Set dst (PopCountI src)); + -+instruct vsubF(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVF src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %} -+ ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e32); -+ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); -+ %} -+ ins_pipe(pipe_slow); -+%} ++ ins_cost(ALU_COST); ++ format %{ "cpopw $dst, $src\t#@popCountI_b" %} + -+instruct vsubD(vReg dst, vReg src1, vReg src2) %{ -+ match(Set dst (SubVD src1 src2)); -+ ins_cost(VEC_COST); -+ format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %} + ins_encode %{ -+ __ vsetvli(t0, x0, Assembler::e64); -+ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), -+ as_VectorRegister($src2$$reg)); ++ __ cpopw(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_slow); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, -+ iRegI_R10 result, vReg_V1 v1, -+ vReg_V2 v2, vReg_V3 v3, rFlagsReg r6) -+%{ -+ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (StrEquals (Binary str1 str2) cnt)); -+ effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3); ++// Note: Long/bitCount(long) returns an int. ++instruct popCountL_b(iRegINoSp dst, iRegL src) %{ ++ predicate(UsePopCountInstruction); ++ match(Set dst (PopCountL src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "cpop $dst, $src\t#@popCountL_b" %} + -+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} + ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_equals_v($str1$$Register, $str2$$Register, -+ $result$$Register, $cnt$$Register, 1); ++ __ cpop(as_Register($dst$$reg), as_Register($src$$reg)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(ialu_reg); +%} + -+instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, -+ iRegI_R10 result, vReg_V1 v1, -+ vReg_V2 v2, vReg_V3 v3, rFlagsReg r6) -+%{ -+ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (StrEquals (Binary str1 str2) cnt)); -+ effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3); ++// Max and Min ++instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ predicate(UseZbb); ++ match(Set dst (MinI src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "min $dst, $src1, $src2\t#@minI_reg_b" %} + -+ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} + ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_equals_v($str1$$Register, $str2$$Register, -+ $result$$Register, $cnt$$Register, 2); ++ __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); + %} -+ ins_pipe(pipe_class_memory); -+%} -+ -+instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6) -+%{ -+ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result (AryEq ary1 ary2)); -+ effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6); + -+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} -+ ins_encode %{ -+ __ arrays_equals_v($ary1$$Register, $ary2$$Register, -+ $result$$Register, $tmp$$Register, 1); -+ %} -+ ins_pipe(pipe_class_memory); ++ ins_pipe(ialu_reg_reg); +%} + -+instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6) -+%{ -+ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result (AryEq ary1 ary2)); -+ effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6); ++instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ predicate(UseZbb); ++ match(Set dst (MaxI src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "max $dst, $src1, $src2\t#@maxI_reg_b" %} + -+ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} + ins_encode %{ -+ __ arrays_equals_v($ary1$$Register, $ary2$$Register, -+ $result$$Register, $tmp$$Register, 2); ++ __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -+ iRegP_R28 tmp1, iRegL_R29 tmp2) -+%{ -+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); ++// Abs ++instruct absI_reg_b(iRegINoSp dst, iRegI src) %{ ++ predicate(UseZbb); ++ match(Set dst (AbsI src)); + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} -+ ins_encode %{ -+ // Count is in 8-bit bytes; non-Compact chars are 16 bits. -+ __ string_compare_v($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ StrIntrinsicNode::UU); ++ ins_cost(ALU_COST * 2); ++ format %{ ++ "negw t0, $src\n\t" ++ "max $dst, $src, t0\t#@absI_reg_b" + %} -+ ins_pipe(pipe_class_memory); -+%} -+instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -+ iRegP_R28 tmp1, iRegL_R29 tmp2) -+%{ -+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} + ins_encode %{ -+ __ string_compare_v($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ StrIntrinsicNode::LL); ++ __ negw(t0, as_Register($src$$reg)); ++ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -+ iRegP_R28 tmp1, iRegL_R29 tmp2) -+%{ -+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); ++instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{ ++ predicate(UseZbb); ++ match(Set dst (AbsL src)); + -+ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} -+ ins_encode %{ -+ __ string_compare_v($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ StrIntrinsicNode::UL); ++ ins_cost(ALU_COST * 2); ++ format %{ ++ "neg t0, $src\n\t" ++ "max $dst, $src, t0\t#@absL_reg_b" + %} -+ ins_pipe(pipe_class_memory); -+%} -+instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, -+ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, -+ iRegP_R28 tmp1, iRegL_R29 tmp2) -+%{ -+ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); -+ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); -+ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, -+ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); + -+ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} + ins_encode %{ -+ __ string_compare_v($str1$$Register, $str2$$Register, -+ $cnt1$$Register, $cnt2$$Register, $result$$Register, -+ $tmp1$$Register, $tmp2$$Register, -+ StrIntrinsicNode::LU); ++ __ neg(t0, as_Register($src$$reg)); ++ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(ialu_reg); +%} + -+// fast byte[] to char[] inflation -+instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) -+%{ -+ predicate(UseRVV); -+ match(Set dummy (StrInflatedCopy src (Binary dst len))); -+ effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len); ++// And Not ++instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ ++ predicate(UseZbb); ++ match(Set dst (AndI src1 (XorI src2 m1))); ++ ++ ins_cost(ALU_COST); ++ format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_b" %} + -+ format %{ "String Inflate $src,$dst" %} + ins_encode %{ -+ address tpc = __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ __ andn(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} -+ ins_pipe(pipe_class_memory); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+// encode char[] to byte[] in ISO_8859_1 -+instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) -+%{ -+ predicate(UseRVV); -+ match(Set result (EncodeISOArray src (Binary dst len))); -+ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, -+ TEMP v1, TEMP v2, TEMP v3, TEMP tmp); ++instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ ++ predicate(UseZbb); ++ match(Set dst (AndL src1 (XorL src2 m1))); ++ ++ ins_cost(ALU_COST); ++ format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_b" %} + -+ format %{ "Encode array $src,$dst,$len -> $result" %} + ins_encode %{ -+ __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register, -+ $result$$Register, $tmp$$Register); ++ __ andn(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} -+ ins_pipe( pipe_class_memory ); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+// fast char[] to byte[] compression -+instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, -+ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) -+%{ -+ predicate(UseRVV); -+ match(Set result (StrCompressedCopy src (Binary dst len))); -+ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, -+ TEMP v1, TEMP v2, TEMP v3, TEMP tmp); ++// Or Not ++instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ ++ predicate(UseZbb); ++ match(Set dst (OrI src1 (XorI src2 m1))); ++ ++ ins_cost(ALU_COST); ++ format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_b" %} + -+ format %{ "String Compress $src,$dst -> $result // KILL R11, R12, R13" %} + ins_encode %{ -+ __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register, -+ $result$$Register, $tmp$$Register); ++ __ orn(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} -+ ins_pipe( pipe_slow ); ++ ++ ins_pipe(ialu_reg_reg); +%} + -+instruct vhas_negatives(iRegP_R11 ary1, iRegI_R12 len, iRegI_R10 result, iRegL tmp) -+%{ -+ predicate(UseRVV); -+ match(Set result (HasNegatives ary1 len)); -+ effect(USE_KILL ary1, USE_KILL len, TEMP tmp); -+ format %{ "has negatives byte[] $ary1,$len -> $result" %} ++instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ ++ predicate(UseZbb); ++ match(Set dst (OrL src1 (XorL src2 m1))); ++ ++ ins_cost(ALU_COST); ++ format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_b" %} ++ + ins_encode %{ -+ address tpc = __ has_negatives_v($ary1$$Register, $len$$Register, $result$$Register, $tmp$$Register); -+ if (tpc == NULL) { -+ ciEnv::current()->record_failure("CodeCache is full"); -+ return; -+ } ++ __ orn(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); + %} -+ ins_pipe( pipe_slow ); -+%} + -+// clearing of an array -+instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, -+ vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3) -+%{ -+ predicate(UseRVV); -+ match(Set dummy (ClearArray cnt base)); -+ effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3); ++ ins_pipe(ialu_reg_reg); + -+ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} ++%} + ++// AndI 0b0..010..0 + ConvI2B ++instruct convI2Bool_andI_reg_immIpowerOf2(iRegINoSp dst, iRegIorL2I src, immIpowerOf2 mask) %{ ++ predicate(UseZbs); ++ match(Set dst (Conv2B (AndI src mask))); ++ ins_cost(ALU_COST); ++ ++ format %{ "bexti $dst, $src, $mask\t#@convI2Bool_andI_reg_immIpowerOf2" %} + ins_encode %{ -+ __ clear_array_v($base$$Register, $cnt$$Register); ++ __ bexti($dst$$Register, $src$$Register, exact_log2((juint)($mask$$constant))); + %} + -+ ins_pipe(pipe_class_memory); ++ ins_pipe(ialu_reg_reg); +%} +\ No newline at end of file diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp new file mode 100644 -index 000000000..9922ff4cf +index 0000000000..f41a496093 --- /dev/null +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp -@@ -0,0 +1,2738 @@ +@@ -0,0 +1,2666 @@ +/* -+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -40637,15 +38433,24 @@ index 000000000..9922ff4cf +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" ++#include "compiler/oopMap.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "logging/log.hpp" +#include "memory/resourceArea.hpp" ++#include "nativeInst_riscv.hpp" +#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/jniHandles.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/align.hpp" ++#include "utilities/formatBuffer.hpp" +#include "vmreg_riscv.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" @@ -40678,9 +38483,8 @@ index 000000000..9922ff4cf +}; + +class RegisterSaver { -+ const bool _save_vectors; + public: -+ RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {} ++ RegisterSaver() {} + ~RegisterSaver() {} + OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); + void restore_live_registers(MacroAssembler* masm); @@ -40689,11 +38493,7 @@ index 000000000..9922ff4cf + // Used by deoptimization when it is managing result register + // values on its own + // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4) -+ // |---v0---|<---SP -+ // |---v1---|save vectors only in generate_handler_blob -+ // |-- .. --| -+ // |---v31--|----- -+ // |---f0---| ++ // |---f0---|<---SP + // |---f1---| + // | .. | + // |---f31--| @@ -40704,16 +38504,8 @@ index 000000000..9922ff4cf + // |---x31--| + // |---fp---| + // |---ra---| -+ int v0_offset_in_bytes(void) { return 0; } + int f0_offset_in_bytes(void) { -+ int f0_offset = 0; -+#ifdef COMPILER2 -+ if (_save_vectors) { -+ f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers * -+ BytesPerInt; -+ } -+#endif -+ return f0_offset; ++ return 0; + } + int reserved_slot_offset_in_bytes(void) { + return f0_offset_in_bytes() + @@ -40723,7 +38515,7 @@ index 000000000..9922ff4cf + } + + int reg_offset_in_bytes(Register r) { -+ assert(r->encoding() > 4, "ra, sp, gp and tp not saved"); ++ assert (r->encoding() > 4, "ra, sp, gp and tp not saved"); + return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize; + } + @@ -40737,22 +38529,10 @@ index 000000000..9922ff4cf + RegisterImpl::max_slots_per_register * + BytesPerInt; + } -+ -+ // During deoptimization only the result registers need to be restored, -+ // all the other values have already been extracted. -+ void restore_result_registers(MacroAssembler* masm); +}; + +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { -+ int vector_size_in_bytes = 0; -+ int vector_size_in_slots = 0; -+#ifdef COMPILER2 -+ if (_save_vectors) { -+ vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE); -+ vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT); -+ } -+#endif -+ ++ assert_cond(masm != NULL && total_frame_words != NULL); + int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16); + // OopMap frame size is in compiler stack slots (jint's) not bytes or words + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; @@ -40762,9 +38542,9 @@ index 000000000..9922ff4cf + int frame_size_in_words = frame_size_in_bytes / wordSize; + *total_frame_words = frame_size_in_words; + -+ // Save Integer, Float and Vector registers. ++ // Save Integer and Float registers. + __ enter(); -+ __ push_CPU_state(_save_vectors, vector_size_in_bytes); ++ __ push_CPU_state(); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This @@ -40777,13 +38557,6 @@ index 000000000..9922ff4cf + + int sp_offset_in_slots = 0; + int step_in_slots = 0; -+ if (_save_vectors) { -+ step_in_slots = vector_size_in_slots; -+ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { -+ VectorRegister r = as_VectorRegister(i); -+ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); -+ } -+ } + + step_in_slots = FloatRegisterImpl::max_slots_per_register; + for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { @@ -40807,46 +38580,40 @@ index 000000000..9922ff4cf +} + +void RegisterSaver::restore_live_registers(MacroAssembler* masm) { -+#ifdef COMPILER2 -+ __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE)); -+#else -+ __ pop_CPU_state(_save_vectors); -+#endif ++ assert_cond(masm != NULL); ++ __ pop_CPU_state(); + __ leave(); +} + -+void RegisterSaver::restore_result_registers(MacroAssembler* masm) { -+ // Just restore result register. Only used by deoptimization. By -+ // now any callee save register that needs to be restored to a c2 -+ // caller of the deoptee has been extracted into the vframeArray -+ // and will be stuffed into the c2i adapter we create for later -+ // restoration so only result registers need to be restored here. -+ // Restore fp result register -+ __ fld(f10, Address(sp, freg_offset_in_bytes(f10))); -+ // Restore integer result register -+ __ ld(x10, Address(sp, reg_offset_in_bytes(x10))); -+ -+ // Pop all of the register save are off the stack -+ __ add(sp, sp, align_up(ra_offset_in_bytes(), 16)); -+} -+ +// Is vector's size (in bytes) bigger than a size saved by default? -+// riscv does not ovlerlay the floating-point registers on vector registers like aarch64. +bool SharedRuntime::is_wide_vector(int size) { -+ return UseRVV; ++ return false; +} + +size_t SharedRuntime::trampoline_size() { -+ // Byte size of function generate_trampoline. movptr_with_offset: 5 instructions, jalr: 1 instrction -+ return 6 * NativeInstruction::instruction_size; // lui + addi + slli + addi + slli + jalr ++ return 6 * NativeInstruction::instruction_size; +} + +void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { + int32_t offset = 0; -+ __ movptr_with_offset(t0, destination, offset); // lui + addi + slli + addi + slli ++ __ movptr_with_offset(t0, destination, offset); + __ jalr(x0, t0, offset); +} + ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and ra ++ // This should really be in_preserve_stack_slots ++ return r->reg2stack() * VMRegImpl::stack_slot_size; ++} ++ ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} ++ +// --------------------------------------------------------------------------- +// Read the array of BasicTypes from a signature, and compute where the +// arguments should go. Values in the VMRegPair regs array refer to 4-byte @@ -40871,7 +38638,6 @@ index 000000000..9922ff4cf + VMRegPair *regs, + int total_args_passed, + int is_outgoing) { -+ assert_cond(sig_bt != NULL && regs != NULL); + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { @@ -40945,6 +38711,7 @@ index 000000000..9922ff4cf + +// Patch the callers callsite with entry to compiled code if it exists. +static void patch_callers_callsite(MacroAssembler *masm) { ++ assert_cond(masm != NULL); + Label L; + __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); + __ beqz(t0, L); @@ -40966,6 +38733,7 @@ index 000000000..9922ff4cf + int32_t offset = 0; + __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset); + __ jalr(x1, t0, offset); ++ + __ pop_CPU_state(); + // restore sp + __ leave(); @@ -41052,7 +38820,7 @@ index 000000000..9922ff4cf + __ sd(t0, Address(sp, next_off), /*temp register*/esp); +#ifdef ASSERT + // Overwrite the unused slot with known junk -+ __ mv(t0, 0xdeadffffdeadaaaaul); ++ __ li(t0, 0xdeadffffdeadaaaaul); + __ sd(t0, Address(sp, st_off), /*temp register*/esp); +#endif /* ASSERT */ + } else { @@ -41068,10 +38836,10 @@ index 000000000..9922ff4cf + // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG + // T_DOUBLE and T_LONG use two slots in the interpreter + if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { -+ // jlong/double in gpr ++ // long/double in gpr +#ifdef ASSERT + // Overwrite the unused slot with known junk -+ __ mv(t0, 0xdeadffffdeadaaabul); ++ __ li(t0, 0xdeadffffdeadaaabul); + __ sd(t0, Address(sp, st_off), /*temp register*/esp); +#endif /* ASSERT */ + __ sd(r, Address(sp, next_off)); @@ -41087,7 +38855,7 @@ index 000000000..9922ff4cf + } else { +#ifdef ASSERT + // Overwrite the unused slot with known junk -+ __ mv(t0, 0xdeadffffdeadaaacul); ++ __ li(t0, 0xdeadffffdeadaaacul); + __ sd(t0, Address(sp, st_off), /*temp register*/esp); +#endif /* ASSERT */ + __ fsd(r_1->as_FloatRegister(), Address(sp, next_off)); @@ -41265,7 +39033,6 @@ index 000000000..9922ff4cf + VMRegPair *regs2, + int total_args_passed) { + assert(regs2 == NULL, "not needed on riscv"); -+ assert_cond(sig_bt != NULL && regs != NULL); + + // We return the amount of VMRegImpl stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. @@ -41343,7 +39110,190 @@ index 000000000..9922ff4cf + return stk_args; +} + ++// On 64 bit we will store integer like items to the stack as ++// 64 bits items (riscv64 abi) even though java would only store ++// 32bits for a parameter. On 32bit it will simply be 32 bits ++// So this routine will do 32->32 on 32bit and 32->64 on 64bit ++static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ // stack to reg ++ __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ if (dst.first() != src.first()) { ++ // 32bits extend sign ++ __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr); ++ } ++ } ++} ++ ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ assert_cond(masm != NULL && map != NULL && receiver_offset != NULL); ++ // must pass a handle. First figure out the location we use as a handle ++ Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); ++ ++ // See if oop is NULL if it is we need no handle ++ ++ if (src.first()->is_stack()) { ++ ++ // Oop is already on the stack as an argument ++ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } ++ ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ la(rHandle, Address(fp, reg2offset_in(src.first()))); ++ // conditionally move a NULL ++ Label notZero1; ++ __ bnez(t0, notZero1); ++ __ mv(rHandle, zr); ++ __ bind(notZero1); ++ } else { ++ ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles and pass a handle if oop is non-NULL ++ ++ const Register rOop = src.first()->as_Register(); ++ int oop_slot = -1; ++ if (rOop == j_rarg0) { ++ oop_slot = 0; ++ } else if (rOop == j_rarg1) { ++ oop_slot = 1; ++ } else if (rOop == j_rarg2) { ++ oop_slot = 2; ++ } else if (rOop == j_rarg3) { ++ oop_slot = 3; ++ } else if (rOop == j_rarg4) { ++ oop_slot = 4; ++ } else if (rOop == j_rarg5) { ++ oop_slot = 5; ++ } else if (rOop == j_rarg6) { ++ oop_slot = 6; ++ } else { ++ assert(rOop == j_rarg7, "wrong register"); ++ oop_slot = 7; ++ } ++ ++ oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot * VMRegImpl::stack_slot_size; ++ ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ // Store oop in handle area, may be NULL ++ __ sd(rOop, Address(sp, offset)); ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } ++ ++ //rOop maybe the same as rHandle ++ if (rOop == rHandle) { ++ Label isZero; ++ __ beqz(rOop, isZero); ++ __ la(rHandle, Address(sp, offset)); ++ __ bind(isZero); ++ } else { ++ Label notZero2; ++ __ la(rHandle, Address(sp, offset)); ++ __ bnez(rOop, notZero2); ++ __ mv(rHandle, zr); ++ __ bind(notZero2); ++ } ++ } ++ ++ // If arg is on the stack then place it otherwise it is already in correct reg. ++ if (dst.first()->is_stack()) { ++ __ sd(rHandle, Address(sp, reg2offset_out(dst.first()))); ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(src.first()->is_stack() && dst.first()->is_stack() || ++ src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ lwu(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sw(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else if (dst.first()->is_Register()) { ++ __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (src.first() != dst.first()) { ++ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { ++ __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ // stack to reg ++ __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ if (dst.first() != src.first()) { ++ __ mv(dst.first()->as_Register(), src.first()->as_Register()); ++ } ++ } ++} ++ ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(src.first()->is_stack() && dst.first()->is_stack() || ++ src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); ++ assert_cond(masm != NULL); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ __ sd(t0, Address(sp, reg2offset_out(dst.first()))); ++ } else if (dst.first()-> is_Register()) { ++ __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (src.first() != dst.first()) { ++ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { ++ __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ assert_cond(masm != NULL); + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { @@ -41361,6 +39311,7 @@ index 000000000..9922ff4cf +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ assert_cond(masm != NULL); + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { @@ -41378,6 +39329,7 @@ index 000000000..9922ff4cf +} + +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ assert_cond(masm != NULL && args != NULL); + RegSet x; + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { @@ -41391,6 +39343,7 @@ index 000000000..9922ff4cf +} + +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ assert_cond(masm != NULL && args != NULL); + RegSet x; + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { @@ -41410,85 +39363,17 @@ index 000000000..9922ff4cf + } +} + -+// Check GCLocker::needs_gc and enter the runtime if it's true. This -+// keeps a new JNI critical region from starting until a GC has been -+// forced. Save down any oops in registers and describe them in an -+// OopMap. -+static void check_needs_gc_for_critical_native(MacroAssembler* masm, -+ int stack_slots, -+ int total_c_args, -+ int total_in_args, -+ int arg_save_area, -+ OopMapSet* oop_maps, -+ VMRegPair* in_regs, -+ BasicType* in_sig_bt) { Unimplemented(); } -+ -+// Unpack an array argument into a pointer to the body and the length -+// if the array is non-null, otherwise pass 0 for both. -+static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); } -+ -+class ComputeMoveOrder: public StackObj { -+ class MoveOperation: public ResourceObj { -+ friend class ComputeMoveOrder; -+ private: -+ VMRegPair _src; -+ VMRegPair _dst; -+ int _src_index; -+ int _dst_index; -+ bool _processed; -+ MoveOperation* _next; -+ MoveOperation* _prev; -+ -+ static int get_id(VMRegPair r) { Unimplemented(); return 0; } -+ -+ public: -+ MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): -+ _src(src) -+ , _dst(dst) -+ , _src_index(src_index) -+ , _dst_index(dst_index) -+ , _processed(false) -+ , _next(NULL) -+ , _prev(NULL) { Unimplemented(); } -+ -+ ~MoveOperation() { -+ _next = NULL; -+ _prev = NULL; -+ } -+ -+ VMRegPair src() const { Unimplemented(); return _src; } -+ int src_id() const { Unimplemented(); return 0; } -+ int src_index() const { Unimplemented(); return 0; } -+ VMRegPair dst() const { Unimplemented(); return _src; } -+ void set_dst(int i, VMRegPair dst) { Unimplemented(); } -+ int dst_index() const { Unimplemented(); return 0; } -+ int dst_id() const { Unimplemented(); return 0; } -+ MoveOperation* next() const { Unimplemented(); return 0; } -+ MoveOperation* prev() const { Unimplemented(); return 0; } -+ void set_processed() { Unimplemented(); } -+ bool is_processed() const { Unimplemented(); return 0; } -+ -+ // insert -+ void break_cycle(VMRegPair temp_register) { Unimplemented(); } -+ -+ void link(GrowableArray& killer) { Unimplemented(); } -+ }; -+ -+ private: -+ GrowableArray edges; -+ -+ public: -+ ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, -+ BasicType* in_sig_bt, GrowableArray& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); } -+ -+ ~ComputeMoveOrder() {} -+ // Collected all the move operations -+ void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); } -+ -+ // Walk the edges breaking cycles between moves. The result list -+ // can be walked in order to produce the proper set of loads -+ GrowableArray* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; } -+}; ++static void rt_call(MacroAssembler* masm, address dest) { ++ assert_cond(masm != NULL); ++ CodeBlob *cb = CodeCache::find_blob(dest); ++ if (cb) { ++ __ far_call(RuntimeAddress(dest)); ++ } else { ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(dest), offset); ++ __ jalr(x1, t0, offset); ++ } ++} + +static void verify_oop_args(MacroAssembler* masm, + const methodHandle& method, @@ -41630,12 +39515,7 @@ index 000000000..9922ff4cf + in_ByteSize(-1), + (OopMapSet*)NULL); + } -+ bool is_critical_native = true; -+ address native_func = critical_entry; -+ if (native_func == NULL) { -+ native_func = method->native_function(); -+ is_critical_native = false; -+ } ++ address native_func = method->native_function(); + assert(native_func != NULL, "must have function"); + + // An OopMap for lock (and class if static) @@ -41650,70 +39530,20 @@ index 000000000..9922ff4cf + // the hidden arguments as arg[0] and possibly arg[1] (static method) + + const int total_in_args = method->size_of_parameters(); -+ int total_c_args = total_in_args; -+ if (!is_critical_native) { -+ total_c_args += 1; -+ if (method->is_static()) { -+ total_c_args++; -+ } -+ } else { -+ for (int i = 0; i < total_in_args; i++) { -+ if (in_sig_bt[i] == T_ARRAY) { -+ total_c_args++; -+ } -+ } -+ } ++ int total_c_args = total_in_args + (method->is_static() ? 2 : 1); + + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); -+ assert_cond(out_sig_bt != NULL && out_regs != NULL); + BasicType* in_elem_bt = NULL; + + int argc = 0; -+ if (!is_critical_native) { -+ out_sig_bt[argc++] = T_ADDRESS; -+ if (method->is_static()) { -+ out_sig_bt[argc++] = T_OBJECT; -+ } ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } + -+ for (int i = 0; i < total_in_args ; i++) { -+ out_sig_bt[argc++] = in_sig_bt[i]; -+ } -+ } else { -+ Thread* THREAD = Thread::current(); -+ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); -+ assert_cond(in_elem_bt != NULL); -+ SignatureStream ss(method->signature()); -+ for (int i = 0; i < total_in_args ; i++) { -+ if (in_sig_bt[i] == T_ARRAY) { -+ // Arrays are passed as int, elem* pair -+ out_sig_bt[argc++] = T_INT; -+ out_sig_bt[argc++] = T_ADDRESS; -+ Symbol* atype = ss.as_symbol(CHECK_NULL); -+ const char* at = atype->as_C_string(); -+ if (strlen(at) == 2) { -+ assert(at[0] == '[', "must be"); -+ switch (at[1]) { -+ case 'B': in_elem_bt[i] = T_BYTE; break; -+ case 'C': in_elem_bt[i] = T_CHAR; break; -+ case 'D': in_elem_bt[i] = T_DOUBLE; break; -+ case 'F': in_elem_bt[i] = T_FLOAT; break; -+ case 'I': in_elem_bt[i] = T_INT; break; -+ case 'J': in_elem_bt[i] = T_LONG; break; -+ case 'S': in_elem_bt[i] = T_SHORT; break; -+ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; -+ default: ShouldNotReachHere(); -+ } -+ } -+ } else { -+ out_sig_bt[argc++] = in_sig_bt[i]; -+ in_elem_bt[i] = T_VOID; -+ } -+ if (in_sig_bt[i] != T_VOID) { -+ assert(in_sig_bt[i] == ss.type(), "must match"); -+ ss.next(); -+ } -+ } ++ for (int i = 0; i < total_in_args ; i++) { ++ out_sig_bt[argc++] = in_sig_bt[i]; + } + + // Now figure out where the args must be stored and how much stack space @@ -41730,34 +39560,6 @@ index 000000000..9922ff4cf + + // Now the space for the inbound oop handle area + int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers -+ if (is_critical_native) { -+ // Critical natives may have to call out so they need a save area -+ // for register arguments. -+ int double_slots = 0; -+ int single_slots = 0; -+ for ( int i = 0; i < total_in_args; i++) { -+ if (in_regs[i].first()->is_Register()) { -+ const Register reg = in_regs[i].first()->as_Register(); -+ switch (in_sig_bt[i]) { -+ case T_BOOLEAN: -+ case T_BYTE: -+ case T_SHORT: -+ case T_CHAR: -+ case T_INT: single_slots++; break; -+ case T_ARRAY: // specific to LP64 (7145024) -+ case T_LONG: double_slots++; break; -+ default: ShouldNotReachHere(); -+ } -+ } else if (in_regs[i].first()->is_FloatRegister()) { -+ ShouldNotReachHere(); -+ } -+ } -+ total_save_slots = double_slots * 2 + single_slots; -+ // align the save area -+ if (double_slots != 0) { -+ stack_slots = align_up(stack_slots, 2); -+ } -+ } + + int oop_handle_offset = stack_slots; + stack_slots += total_save_slots; @@ -41849,11 +39651,7 @@ index 000000000..9922ff4cf + __ nop(); + + // Generate stack overflow check -+ if (UseStackBanging) { -+ __ bang_stack_with_offset(checked_cast(JavaThread::stack_shadow_zone_size())); -+ } else { -+ Unimplemented(); -+ } ++ __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); + + // Generate a new frame for the wrapper. + __ enter(); @@ -41868,11 +39666,6 @@ index 000000000..9922ff4cf + + const Register oop_handle_reg = x18; + -+ if (is_critical_native) { -+ check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, -+ oop_handle_offset, oop_maps, in_regs, in_sig_bt); -+ } -+ + // + // We immediately shuffle the arguments so that any vm call we have to + // make from here on out (sync slow path, jvmti, etc.) we will have @@ -41917,22 +39710,14 @@ index 000000000..9922ff4cf + +#endif /* ASSERT */ + -+ // This may iterate in two different directions depending on the -+ // kind of native it is. The reason is that for regular JNI natives -+ // the incoming and outgoing registers are offset upwards and for -+ // critical natives they are offset down. ++ // For JNI natives the incoming and outgoing registers are offset upwards. + GrowableArray arg_order(2 * total_in_args); + VMRegPair tmp_vmreg; + tmp_vmreg.set2(x9->as_VMReg()); + -+ if (!is_critical_native) { -+ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { -+ arg_order.push(i); -+ arg_order.push(c_arg); -+ } -+ } else { -+ // Compute a valid move order, using tmp_vmreg to break any cycles -+ ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); + } + + int temploc = -1; @@ -41940,20 +39725,7 @@ index 000000000..9922ff4cf + int i = arg_order.at(ai); + int c_arg = arg_order.at(ai + 1); + __ block_comment(err_msg("mv %d -> %d", i, c_arg)); -+ if (c_arg == -1) { -+ assert(is_critical_native, "should only be required for critical natives"); -+ // This arg needs to be moved to a temporary -+ __ mv(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); -+ in_regs[i] = tmp_vmreg; -+ temploc = i; -+ continue; -+ } else if (i == -1) { -+ assert(is_critical_native, "should only be required for critical natives"); -+ // Read from the temporary location -+ assert(temploc != -1, "must be valid"); -+ i = temploc; -+ temploc = -1; -+ } ++ assert(c_arg != -1 && i != -1, "wrong order"); +#ifdef ASSERT + if (in_regs[i].first()->is_Register()) { + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); @@ -41968,32 +39740,17 @@ index 000000000..9922ff4cf +#endif /* ASSERT */ + switch (in_sig_bt[i]) { + case T_ARRAY: -+ if (is_critical_native) { -+ unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); -+ c_arg++; -+#ifdef ASSERT -+ if (out_regs[c_arg].first()->is_Register()) { -+ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; -+ } else if (out_regs[c_arg].first()->is_FloatRegister()) { -+ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; -+ } -+#endif -+ int_args++; -+ break; -+ } -+ // no break + case T_OBJECT: -+ assert(!is_critical_native, "no oop arguments"); -+ __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], -+ ((i == 0) && (!is_static)), -+ &receiver_offset); ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); + int_args++; + break; + case T_VOID: + break; + + case T_FLOAT: -+ __ float_move(in_regs[i], out_regs[c_arg]); ++ float_move(masm, in_regs[i], out_regs[c_arg]); + float_args++; + break; + @@ -42001,12 +39758,12 @@ index 000000000..9922ff4cf + assert( i + 1 < total_in_args && + in_sig_bt[i + 1] == T_VOID && + out_sig_bt[c_arg + 1] == T_VOID, "bad arg list"); -+ __ double_move(in_regs[i], out_regs[c_arg]); ++ double_move(masm, in_regs[i], out_regs[c_arg]); + float_args++; + break; + + case T_LONG : -+ __ long_move(in_regs[i], out_regs[c_arg]); ++ long_move(masm, in_regs[i], out_regs[c_arg]); + int_args++; + break; + @@ -42015,7 +39772,7 @@ index 000000000..9922ff4cf + break; + + default: -+ __ move32_64(in_regs[i], out_regs[c_arg]); ++ move32_64(masm, in_regs[i], out_regs[c_arg]); + int_args++; + } + } @@ -42025,7 +39782,7 @@ index 000000000..9922ff4cf + int c_arg = total_c_args - total_in_args; + + // Pre-load a static method's oop into c_rarg1. -+ if (method->is_static() && !is_critical_native) { ++ if (method->is_static()) { + + // load oop into a register + __ movoop(c_rarg1, @@ -42084,7 +39841,6 @@ index 000000000..9922ff4cf + Label lock_done; + + if (method->is_synchronized()) { -+ assert(!is_critical_native, "unhandled"); + + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + @@ -42132,7 +39888,6 @@ index 000000000..9922ff4cf + __ bnez(swap_reg, slow_path_lock); + + // Slow path will re-enter here -+ + __ bind(lock_done); + } + @@ -42140,9 +39895,7 @@ index 000000000..9922ff4cf + // Finally just about ready to make the JNI call + + // get JNIEnv* which is first argument to native -+ if (!is_critical_native) { -+ __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset()))); -+ } ++ __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset()))); + + // Now set thread in native + __ la(t1, Address(xthread, JavaThread::thread_state_offset())); @@ -42150,7 +39903,7 @@ index 000000000..9922ff4cf + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + __ sw(t0, Address(t1)); + -+ __ rt_call(native_func); ++ rt_call(masm, native_func); + + __ bind(native_return); + @@ -42158,10 +39911,13 @@ index 000000000..9922ff4cf + oop_maps->add_gc_map(return_pc - start, map); + + // Unpack native results. -+ if(ret_type != T_OBJECT && ret_type != T_ARRAY) { ++ if (ret_type != T_OBJECT && ret_type != T_ARRAY) { + __ cast_primitive_type(ret_type, x10); + } + ++ Label safepoint_in_progress, safepoint_in_progress_done; ++ Label after_transition; ++ + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: @@ -42171,29 +39927,12 @@ index 000000000..9922ff4cf + // didn't see any synchronization is progress, and escapes. + __ mv(t0, _thread_in_native_trans); + -+ if(os::is_MP()) { -+ if (UseMembar) { -+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); -+ -+ // Force this write out before the read below -+ __ membar(MacroAssembler::AnyAny); -+ } else { -+ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); -+ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); -+ __ sw(t0, Address(t1)); ++ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); + -+ // Write serialization page so VM thread can do a pseudo remote membar. -+ // We use the current thread pointer to calculate a thread specific -+ // offset to write to within the page. This minimizes bus traffic -+ // due to cache line collision. -+ __ serialize_memory(xthread, x12, t0); -+ } -+ } else { -+ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); -+ } ++ // Force this write out before the read below ++ __ membar(MacroAssembler::AnyAny); + + // check for safepoint operation in progress and/or pending suspend requests -+ Label safepoint_in_progress, safepoint_in_progress_done; + { + __ safepoint_poll_acquire(safepoint_in_progress); + __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); @@ -42202,7 +39941,6 @@ index 000000000..9922ff4cf + } + + // change thread state -+ Label after_transition; + __ la(t1, Address(xthread, JavaThread::thread_state_offset())); + __ mv(t0, _thread_in_Java); + __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); @@ -42233,7 +39971,6 @@ index 000000000..9922ff4cf + } + + // Simple recursive lock? -+ + __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + __ beqz(t0, done); + @@ -42242,7 +39979,6 @@ index 000000000..9922ff4cf + save_native_result(masm, ret_type, stack_slots); + } + -+ + // get address of the stack lock + __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + // get old displaced header @@ -42274,7 +40010,7 @@ index 000000000..9922ff4cf + __ reset_last_Java_frame(false); + + // Unbox oop result, e.g. JNIHandles::resolve result. -+ if (ret_type == T_OBJECT || ret_type == T_ARRAY) { ++ if (is_reference_type(ret_type)) { + __ resolve_jobject(x10, xthread, t1); + } + @@ -42283,32 +40019,26 @@ index 000000000..9922ff4cf + __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); + } + -+ if (!is_critical_native) { -+ // reset handle block -+ __ ld(x12, Address(xthread, JavaThread::active_handles_offset())); -+ __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes())); -+ } ++ // reset handle block ++ __ ld(x12, Address(xthread, JavaThread::active_handles_offset())); ++ __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes())); + + __ leave(); + -+ if (!is_critical_native) { -+ // Any exception pending? -+ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); -+ __ bnez(t0, exception_pending); -+ } ++ // Any exception pending? ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ bnez(t0, exception_pending); + + // We're done + __ ret(); + + // Unexpected paths are out of line and go here + -+ if (!is_critical_native) { -+ // forward the exception -+ __ bind(exception_pending); ++ // forward the exception ++ __ bind(exception_pending); + -+ // and forward the exception -+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); -+ } ++ // and forward the exception ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // Slow path locking & unlocking + if (method->is_synchronized()) { @@ -42345,7 +40075,7 @@ index 000000000..9922ff4cf + __ block_comment("Slow path unlock {"); + __ bind(slow_path_unlock); + -+ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { + save_native_result(masm, ret_type, stack_slots); + } + @@ -42358,7 +40088,7 @@ index 000000000..9922ff4cf + __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + -+ __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); ++ rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); + +#ifdef ASSERT + { @@ -42372,7 +40102,7 @@ index 000000000..9922ff4cf + + __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); + -+ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE) { + restore_native_result(masm, ret_type, stack_slots); + } + __ j(unlock_done); @@ -42385,7 +40115,7 @@ index 000000000..9922ff4cf + + __ bind(reguard); + save_native_result(masm, ret_type, stack_slots); -+ __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); ++ rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); + restore_native_result(masm, ret_type, stack_slots); + // and continue + __ j(reguard_done); @@ -42404,21 +40134,12 @@ index 000000000..9922ff4cf + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + int32_t offset = 0; -+ if (!is_critical_native) { -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); -+ } else { -+ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)), offset); -+ } ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); + __ jalr(x1, t0, offset); ++ + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); + -+ if (is_critical_native) { -+ // The call above performed the transition to thread_in_Java so -+ // skip the transition logic above. -+ __ j(after_transition); -+ } -+ + __ j(safepoint_in_progress_done); + __ block_comment("} safepoint"); + } @@ -42466,10 +40187,6 @@ index 000000000..9922ff4cf + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), + oop_maps); + assert(nm != NULL, "create native nmethod fail!"); -+ if (is_critical_native) { -+ nm->set_lazy_critical_native(true); -+ } -+ + return nm; +} + @@ -42498,7 +40215,7 @@ index 000000000..9922ff4cf + OopMap* map = NULL; + OopMapSet *oop_maps = new OopMapSet(); + assert_cond(masm != NULL && oop_maps != NULL); -+ RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0); ++ RegisterSaver reg_saver; + + // ------------- + // This code enters when returning to a de-optimized nmethod. A return @@ -42590,7 +40307,7 @@ index 000000000..9922ff4cf + // Now it is safe to overwrite any register + + // Deopt during an exception. Save exec mode for unpack_frames. -+ __ mv(xcpool, Deoptimization::Unpack_exception); // callee-saved ++ __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved + + // load throwing pc from JavaThread and patch it as the return address + // of the current frame. Then clear the field in JavaThread @@ -42651,7 +40368,7 @@ index 000000000..9922ff4cf + + __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); + Label noException; -+ __ mv(t0, Deoptimization::Unpack_exception); ++ __ li(t0, Deoptimization::Unpack_exception); + __ bne(xcpool, t0, noException); // Was exception pending? + __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); + __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); @@ -42668,7 +40385,14 @@ index 000000000..9922ff4cf + // Only register save data is on the stack. + // Now restore the result registers. Everything else is either dead + // or captured in the vframeArray. -+ reg_saver.restore_result_registers(masm); ++ ++ // Restore fp result register ++ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); ++ // Restore integer result register ++ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); ++ ++ // Pop all of the register save area off the stack ++ __ add(sp, sp, frame_size_in_words * wordSize); + + // All of the register save area has been popped of the stack. Only the + // return address remains. @@ -42697,10 +40421,8 @@ index 000000000..9922ff4cf + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. -+ if (UseStackBanging) { -+ __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); -+ __ bang_stack_size(x9, x12); -+ } ++ __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); ++ __ bang_stack_size(x9, x12); +#endif + // Load address of array of frame pcs into x12 + __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); @@ -42725,7 +40447,7 @@ index 000000000..9922ff4cf + __ sub(sp, sp, x9); + + // Push interpreter frames in a loop -+ __ mv(t0, (uint64_t)0xDEADDEAD); // Make a recognizable pattern ++ __ li(t0, 0xDEADDEAD); // Make a recognizable pattern + __ mv(t1, t0); + Label loop; + __ bind(loop); @@ -42775,7 +40497,7 @@ index 000000000..9922ff4cf + // Set an oopmap for the call site + // Use the same PC we used for the last java frame + oop_maps->add_gc_map(the_pc - start, -+ new OopMap( frame_size_in_words, 0 )); ++ new OopMap(frame_size_in_words, 0)); + + // Clear fp AND pc + __ reset_last_Java_frame(true); @@ -42901,12 +40623,10 @@ index 000000000..9922ff4cf + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. -+ if (UseStackBanging) { -+ __ lwu(x11, Address(x14, -+ Deoptimization::UnrollBlock:: -+ total_frame_sizes_offset_in_bytes())); -+ __ bang_stack_size(x11, x12); -+ } ++ __ lwu(x11, Address(x14, ++ Deoptimization::UnrollBlock:: ++ total_frame_sizes_offset_in_bytes())); ++ __ bang_stack_size(x11, x12); +#endif + + // Load address of array of frame pcs into x12 (address*) @@ -43019,7 +40739,7 @@ index 000000000..9922ff4cf + address call_pc = NULL; + int frame_size_in_words = -1; + bool cause_return = (poll_type == POLL_AT_RETURN); -+ RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); ++ RegisterSaver reg_saver; + + // Save Integer and Float registers. + map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); @@ -43128,7 +40848,7 @@ index 000000000..9922ff4cf +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { -+ assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; @@ -43138,7 +40858,7 @@ index 000000000..9922ff4cf + assert_cond(masm != NULL); + + int frame_size_in_words = -1; -+ RegisterSaver reg_saver(false /* save_vectors */); ++ RegisterSaver reg_saver; + + OopMapSet *oop_maps = new OopMapSet(); + assert_cond(oop_maps != NULL); @@ -43290,6 +41010,10 @@ index 000000000..9922ff4cf + __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset); + __ jalr(x1, t0, offset); + ++ ++ // handle_exception_C is a special VM call which does not require an explicit ++ // instruction sync afterwards. ++ + // Set an oopmap for the call site. This oopmap will only be used if we + // are unwinding the stack. Hence, all locations will be dead. + // Callee-saved registers will be the same as the frame above (i.e., @@ -43345,14 +41069,14 @@ index 000000000..9922ff4cf +#endif // COMPILER2 diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp new file mode 100644 -index 000000000..c5b3b094c +index 0000000000..9970229c5c --- /dev/null +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -0,0 +1,3743 @@ +/* + * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -43378,9 +41102,11 @@ index 000000000..c5b3b094c +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" ++#include "compiler/oopMap.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/interpreter.hpp" ++#include "memory/universe.hpp" +#include "nativeInst_riscv.hpp" +#include "oops/instanceOop.hpp" +#include "oops/method.hpp" @@ -43397,7 +41123,9 @@ index 000000000..c5b3b094c +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif -+ ++#if INCLUDE_ZGC ++#include "gc/z/zThreadLocalData.hpp" ++#endif + +// Declaration and definition of StubGenerator (no .hpp file). +// For a more detailed description of the stub routine structure @@ -43505,8 +41233,8 @@ index 000000000..c5b3b094c + // -5 [ parameters (x15) ] + // -4 [ parameter size (x16) ] + // -3 [ thread (x17) ] -+ // -2 [ saved fp (x8) ] -+ // -1 [ saved ra (x1) ] ++ // -2 [ saved fp (x8) ] ++ // -1 [ saved ra (x1) ] + // 0 [ ] <--- fp == saved sp (x2) + + // Call stub stack layout word offsets from fp @@ -43539,15 +41267,15 @@ index 000000000..c5b3b094c + x9_off = -11, + + call_wrapper_off = -10, -+ result_off = -9, -+ result_type_off = -8, -+ method_off = -7, -+ entry_point_off = -6, -+ parameters_off = -5, -+ parameter_size_off = -4, -+ thread_off = -3, -+ fp_f = -2, -+ retaddr_off = -1, ++ result_off = -9, ++ result_type_off = -8, ++ method_off = -7, ++ entry_point_off = -6, ++ parameters_off = -5, ++ parameter_size_off = -4, ++ thread_off = -3, ++ fp_f = -2, ++ retaddr_off = -1, + }; + + address generate_call_stub(address& return_address) { @@ -43701,13 +41429,13 @@ index 000000000..c5b3b094c + __ ld(j_rarg2, result); + Label is_long, is_float, is_double, exit; + __ ld(j_rarg1, result_type); -+ __ mv(t0, (u1)T_OBJECT); ++ __ li(t0, (u1)T_OBJECT); + __ beq(j_rarg1, t0, is_long); -+ __ mv(t0, (u1)T_LONG); ++ __ li(t0, (u1)T_LONG); + __ beq(j_rarg1, t0, is_long); -+ __ mv(t0, (u1)T_FLOAT); ++ __ li(t0, (u1)T_FLOAT); + __ beq(j_rarg1, t0, is_float); -+ __ mv(t0, (u1)T_DOUBLE); ++ __ li(t0, (u1)T_DOUBLE); + __ beq(j_rarg1, t0, is_double); + + // handle T_INT case @@ -43945,7 +41673,7 @@ index 000000000..c5b3b094c + + Label exit, error; + -+ __ push_reg(RegSet::of(c_rarg2, c_rarg3), sp); // save c_rarg2 and c_rarg3 ++ __ push_reg(0x3000, sp); // save c_rarg2 and c_rarg3 + + __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr())); + __ ld(c_rarg3, Address(c_rarg2)); @@ -43961,7 +41689,7 @@ index 000000000..c5b3b094c + __ andr(c_rarg2, x10, c_rarg3); + __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits()); + -+ // Compare c_rarg2 and c_rarg3 ++ // Compare c_rarg2 and c_rarg3. + __ bne(c_rarg2, c_rarg3, error); + + // make sure klass is 'reasonable', which is not zero. @@ -43971,16 +41699,15 @@ index 000000000..c5b3b094c + // return if everything seems ok + __ bind(exit); + -+ __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3 ++ __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3 + __ ret(); + + // handle errors + __ bind(error); -+ __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3 ++ __ pop_reg(0x3000, sp); // pop c_rarg2 and c_rarg3 + -+ __ push_reg(RegSet::range(x0, x31), sp); -+ // prepare parameters for debug64, c_rarg0: address of error message, -+ // c_rarg1: return address, c_rarg2: address of regs on stack ++ __ pusha(); ++ // debug(char* msg, int64_t pc, int64_t regs[]) + __ mv(c_rarg0, t0); // pass address of error message + __ mv(c_rarg1, ra); // pass return address + __ mv(c_rarg2, sp); // pass address of regs on stack @@ -43991,6 +41718,7 @@ index 000000000..c5b3b094c + int32_t offset = 0; + __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset); + __ jalr(x1, t0, offset); ++ __ ebreak(); + + return start; + } @@ -44036,42 +41764,276 @@ index 000000000..c5b3b094c + return start; + } + -+ typedef void (MacroAssembler::*copy_insn)(Register R1, Register R2, const int32_t offset); ++ typedef enum { ++ copy_forwards = 1, ++ copy_backwards = -1 ++ } copy_direction; ++ ++ // Bulk copy of blocks of 8 words. ++ // ++ // count is a count of words. ++ // ++ // Precondition: count >= 8 ++ // ++ // Postconditions: ++ // ++ // The least significant bit of count contains the remaining count ++ // of words to copy. The rest of count is trash. ++ // ++ // s and d are adjusted to point to the remaining words to copy ++ // ++ void generate_copy_longs(Label &start, Register s, Register d, Register count, ++ copy_direction direction) { ++ int unit = wordSize * direction; ++ int bias = wordSize; ++ ++ const Register tmp_reg0 = x13, tmp_reg1 = x14, tmp_reg2 = x15, tmp_reg3 = x16, ++ tmp_reg4 = x17, tmp_reg5 = x7, tmp_reg6 = x28, tmp_reg7 = x29; ++ ++ const Register stride = x30; ++ ++ assert_different_registers(t0, tmp_reg0, tmp_reg1, tmp_reg2, tmp_reg3, ++ tmp_reg4, tmp_reg5, tmp_reg6, tmp_reg7); ++ assert_different_registers(s, d, count, t0); ++ ++ Label again, drain; ++ const char* stub_name = NULL; ++ if (direction == copy_forwards) { ++ stub_name = "forward_copy_longs"; ++ } else { ++ stub_name = "backward_copy_longs"; ++ } ++ StubCodeMark mark(this, "StubRoutines", stub_name); ++ __ align(CodeEntryAlignment); ++ __ bind(start); ++ ++ if (direction == copy_forwards) { ++ __ sub(s, s, bias); ++ __ sub(d, d, bias); ++ } ++ ++#ifdef ASSERT ++ // Make sure we are never given < 8 words ++ { ++ Label L; + -+ void copy_by_step(RegSet tmp_regs, Register src, Register dst, -+ unsigned unroll_factor, int unit) { -+ unsigned char regs[32]; -+ int offset = unit < 0 ? unit : 0; ++ __ li(t0, 8); ++ __ bge(count, t0, L); ++ __ stop("genrate_copy_longs called with < 8 words"); ++ __ bind(L); ++ } ++#endif + -+ // Scan bitset to get tmp regs -+ unsigned int regsSize = 0; -+ unsigned bitset = tmp_regs.bits(); ++ __ ld(tmp_reg0, Address(s, 1 * unit)); ++ __ ld(tmp_reg1, Address(s, 2 * unit)); ++ __ ld(tmp_reg2, Address(s, 3 * unit)); ++ __ ld(tmp_reg3, Address(s, 4 * unit)); ++ __ ld(tmp_reg4, Address(s, 5 * unit)); ++ __ ld(tmp_reg5, Address(s, 6 * unit)); ++ __ ld(tmp_reg6, Address(s, 7 * unit)); ++ __ ld(tmp_reg7, Address(s, 8 * unit)); ++ __ addi(s, s, 8 * unit); ++ ++ __ sub(count, count, 16); ++ __ bltz(count, drain); ++ ++ __ bind(again); ++ ++ __ sd(tmp_reg0, Address(d, 1 * unit)); ++ __ sd(tmp_reg1, Address(d, 2 * unit)); ++ __ sd(tmp_reg2, Address(d, 3 * unit)); ++ __ sd(tmp_reg3, Address(d, 4 * unit)); ++ __ sd(tmp_reg4, Address(d, 5 * unit)); ++ __ sd(tmp_reg5, Address(d, 6 * unit)); ++ __ sd(tmp_reg6, Address(d, 7 * unit)); ++ __ sd(tmp_reg7, Address(d, 8 * unit)); ++ ++ __ ld(tmp_reg0, Address(s, 1 * unit)); ++ __ ld(tmp_reg1, Address(s, 2 * unit)); ++ __ ld(tmp_reg2, Address(s, 3 * unit)); ++ __ ld(tmp_reg3, Address(s, 4 * unit)); ++ __ ld(tmp_reg4, Address(s, 5 * unit)); ++ __ ld(tmp_reg5, Address(s, 6 * unit)); ++ __ ld(tmp_reg6, Address(s, 7 * unit)); ++ __ ld(tmp_reg7, Address(s, 8 * unit)); ++ ++ __ addi(s, s, 8 * unit); ++ __ addi(d, d, 8 * unit); ++ ++ __ sub(count, count, 8); ++ __ bgez(count, again); ++ ++ // Drain ++ __ bind(drain); ++ ++ __ sd(tmp_reg0, Address(d, 1 * unit)); ++ __ sd(tmp_reg1, Address(d, 2 * unit)); ++ __ sd(tmp_reg2, Address(d, 3 * unit)); ++ __ sd(tmp_reg3, Address(d, 4 * unit)); ++ __ sd(tmp_reg4, Address(d, 5 * unit)); ++ __ sd(tmp_reg5, Address(d, 6 * unit)); ++ __ sd(tmp_reg6, Address(d, 7 * unit)); ++ __ sd(tmp_reg7, Address(d, 8 * unit)); ++ __ addi(d, d, 8 * unit); + -+ assert(((bitset & (1 << (src->encoding()))) == 0), "src should not in tmp regs"); -+ assert(((bitset & (1 << (dst->encoding()))) == 0), "dst should not in tmp regs"); ++ { ++ Label L1, L2; ++ __ andi(t0, count, 4); ++ __ beqz(t0, L1); + -+ for (int reg = 31; reg >= 0; reg--) { -+ if ((1U << 31) & bitset) { -+ regs[regsSize++] = reg; ++ __ ld(tmp_reg0, Address(s, 1 * unit)); ++ __ ld(tmp_reg1, Address(s, 2 * unit)); ++ __ ld(tmp_reg2, Address(s, 3 * unit)); ++ __ ld(tmp_reg3, Address(s, 4 * unit)); ++ __ addi(s, s, 4 * unit); ++ ++ __ sd(tmp_reg0, Address(d, 1 * unit)); ++ __ sd(tmp_reg1, Address(d, 2 * unit)); ++ __ sd(tmp_reg2, Address(d, 3 * unit)); ++ __ sd(tmp_reg3, Address(d, 4 * unit)); ++ __ addi(d, d, 4 * unit); ++ ++ __ bind(L1); ++ ++ if (direction == copy_forwards) { ++ __ addi(s, s, bias); ++ __ addi(d, d, bias); + } -+ bitset <<= 1; ++ ++ __ andi(t0, count, 2); ++ __ beqz(t0, L2); ++ if (direction == copy_backwards) { ++ __ addi(s, s, 2 * unit); ++ __ ld(tmp_reg0, Address(s)); ++ __ ld(tmp_reg1, Address(s, wordSize)); ++ __ addi(d, d, 2 * unit); ++ __ sd(tmp_reg0, Address(d)); ++ __ sd(tmp_reg1, Address(d, wordSize)); ++ } else { ++ __ ld(tmp_reg0, Address(s)); ++ __ ld(tmp_reg1, Address(s, wordSize)); ++ __ addi(s, s, 2 * unit); ++ __ sd(tmp_reg0, Address(d)); ++ __ sd(tmp_reg1, Address(d, wordSize)); ++ __ addi(d, d, 2 * unit); ++ } ++ __ bind(L2); ++ } ++ ++ __ ret(); ++ } ++ ++ Label copy_f, copy_b; ++ ++ // All-singing all-dancing memory copy. ++ // ++ // Copy count units of memory from s to d. The size of a unit is ++ // step, which can be positive or negative depending on the direction ++ // of copy. If is_aligned is false, we align the source address. ++ // ++ /* ++ * if (is_aligned) { ++ * goto copy_8_bytes; ++ * } ++ * bool is_backwards = step < 0; ++ * int granularity = uabs(step); ++ * count = count * granularity; * count bytes ++ * ++ * if (is_backwards) { ++ * s += count; ++ * d += count; ++ * } ++ * ++ * count limit maybe greater than 16, for better performance ++ * if (count < 16) { ++ * goto copy_small; ++ * } ++ * ++ * if ((dst % 8) == (src % 8)) { ++ * aligned; ++ * goto copy8; ++ * } ++ * ++ * copy_small: ++ * load element one by one; ++ * done; ++ */ ++ ++ typedef void (MacroAssembler::*copy_insn)(Register Rd, const Address &adr, Register temp); ++ ++ void copy_memory_v(Register s, Register d, Register count, Register tmp, int step) { ++ bool is_backward = step < 0; ++ int granularity = uabs(step); ++ ++ const Register src = x30, dst = x31, vl = x14, cnt = x15, tmp1 = x16, tmp2 = x17; ++ assert_different_registers(s, d, cnt, vl, tmp, tmp1, tmp2); ++ Assembler::SEW sew = Assembler::elembytes_to_sew(granularity); ++ Label loop_forward, loop_backward, done; ++ ++ __ mv(dst, d); ++ __ mv(src, s); ++ __ mv(cnt, count); ++ ++ __ bind(loop_forward); ++ __ vsetvli(vl, cnt, sew, Assembler::m8); ++ if (is_backward) { ++ __ bne(vl, cnt, loop_backward); ++ } ++ ++ __ vlex_v(v0, src, sew); ++ __ sub(cnt, cnt, vl); ++ __ slli(vl, vl, (int)sew); ++ __ add(src, src, vl); ++ ++ __ vsex_v(v0, dst, sew); ++ __ add(dst, dst, vl); ++ __ bnez(cnt, loop_forward); ++ ++ if (is_backward) { ++ __ j(done); ++ ++ __ bind(loop_backward); ++ __ sub(tmp, cnt, vl); ++ __ slli(tmp, tmp, sew); ++ __ add(tmp1, s, tmp); ++ __ vlex_v(v0, tmp1, sew); ++ __ add(tmp2, d, tmp); ++ __ vsex_v(v0, tmp2, sew); ++ __ sub(cnt, cnt, vl); ++ __ bnez(cnt, loop_forward); ++ __ bind(done); ++ } ++ } ++ ++ void copy_memory(bool is_aligned, Register s, Register d, ++ Register count, Register tmp, int step) { ++ if (UseRVV) { ++ return copy_memory_v(s, d, count, tmp, step); + } + ++ bool is_backwards = step < 0; ++ int granularity = uabs(step); ++ ++ const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17; ++ ++ Label same_aligned; ++ Label copy8, copy_small, done; ++ + copy_insn ld_arr = NULL, st_arr = NULL; -+ switch (abs(unit)) { ++ switch (granularity) { + case 1 : + ld_arr = (copy_insn)&MacroAssembler::lbu; + st_arr = (copy_insn)&MacroAssembler::sb; + break; -+ case BytesPerShort : ++ case 2 : + ld_arr = (copy_insn)&MacroAssembler::lhu; + st_arr = (copy_insn)&MacroAssembler::sh; + break; -+ case BytesPerInt : ++ case 4 : + ld_arr = (copy_insn)&MacroAssembler::lwu; + st_arr = (copy_insn)&MacroAssembler::sw; + break; -+ case BytesPerLong : ++ case 8 : + ld_arr = (copy_insn)&MacroAssembler::ld; + st_arr = (copy_insn)&MacroAssembler::sd; + break; @@ -44079,294 +42041,83 @@ index 000000000..c5b3b094c + ShouldNotReachHere(); + } + -+ for (unsigned i = 0; i < unroll_factor; i++) { -+ (_masm->*ld_arr)(as_Register(regs[0]), src, i * unit + offset); -+ (_masm->*st_arr)(as_Register(regs[0]), dst, i * unit + offset); ++ __ beqz(count, done); ++ __ slli(cnt, count, exact_log2(granularity)); ++ if (is_backwards) { ++ __ add(src, s, cnt); ++ __ add(dst, d, cnt); ++ } else { ++ __ mv(src, s); ++ __ mv(dst, d); + } + -+ __ addi(src, src, unroll_factor * unit); -+ __ addi(dst, dst, unroll_factor * unit); -+ } -+ -+ void copy_tail(Register src, Register dst, Register count_in_bytes, Register tmp, -+ int ele_size, unsigned align_unit) { -+ bool is_backwards = ele_size < 0; -+ size_t granularity = uabs(ele_size); -+ for (unsigned unit = (align_unit >> 1); unit >= granularity; unit >>= 1) { -+ int offset = is_backwards ? (int)(-unit) : unit; -+ Label exit; -+ __ andi(tmp, count_in_bytes, unit); -+ __ beqz(tmp, exit); -+ copy_by_step(RegSet::of(tmp), src, dst, /* unroll_factor */ 1, offset); -+ __ bind(exit); ++ if (is_aligned) { ++ __ addi(tmp, cnt, -8); ++ __ bgez(tmp, copy8); ++ __ j(copy_small); + } -+ } + -+ void copy_loop8(Register src, Register dst, Register count_in_bytes, Register tmp, -+ int step, Label *Lcopy_small, Register loopsize = noreg) { -+ size_t granularity = uabs(step); -+ RegSet tmp_regs = RegSet::range(x13, x16); -+ assert_different_registers(src, dst, count_in_bytes, tmp); ++ __ mv(tmp, 16); ++ __ blt(cnt, tmp, copy_small); + -+ Label loop, copy2, copy1, finish; -+ if (loopsize == noreg) { -+ loopsize = t1; -+ __ mv(loopsize, 8 * granularity); -+ } ++ __ xorr(tmp, src, dst); ++ __ andi(tmp, tmp, 0b111); ++ __ bnez(tmp, copy_small); + -+ // Cyclic copy with 8*step. -+ __ bind(loop); -+ { -+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 8, step); -+ __ sub(count_in_bytes, count_in_bytes, 8 * granularity); -+ __ bge(count_in_bytes, loopsize, loop); ++ __ bind(same_aligned); ++ __ andi(tmp, src, 0b111); ++ __ beqz(tmp, copy8); ++ if (is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); + } -+ -+ if (Lcopy_small != NULL) { -+ __ bind(*Lcopy_small); ++ (_masm->*ld_arr)(tmp3, Address(src), t0); ++ (_masm->*st_arr)(tmp3, Address(dst), t0); ++ if (!is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); + } ++ __ addi(cnt, cnt, -granularity); ++ __ beqz(cnt, done); ++ __ j(same_aligned); + -+ // copy memory smaller than step * 8 bytes -+ __ andi(tmp, count_in_bytes, granularity << 2); -+ __ beqz(tmp, copy2); -+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 4, step); -+ -+ __ bind(copy2); -+ __ andi(tmp, count_in_bytes, granularity << 1); -+ __ beqz(tmp, copy1); -+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 2, step); -+ -+ __ bind(copy1); -+ __ andi(tmp, count_in_bytes, granularity); -+ __ beqz(tmp, finish); -+ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 1, step); -+ -+ __ bind(finish); -+ } -+ -+ // Cyclic copy with one step. -+ void copy_loop1(Register src, Register dst, Register count_in_bytes, int step, Register loopsize = noreg) { -+ size_t granularity = uabs(step); -+ Label loop1; -+ if (loopsize == noreg) { -+ loopsize = t0; -+ __ mv(loopsize, granularity); ++ __ bind(copy8); ++ if (is_backwards) { ++ __ addi(src, src, -wordSize); ++ __ addi(dst, dst, -wordSize); + } -+ -+ __ bind(loop1); -+ { -+ copy_by_step(RegSet::of(x13), src, dst, /* unroll_factor */ 1, step); -+ __ sub(count_in_bytes, count_in_bytes, granularity); -+ __ bge(count_in_bytes, loopsize, loop1); -+ } -+ } -+ -+ void align_unit(Register src, Register dst, Register count_in_bytes, -+ unsigned unit, bool is_backwards) { -+ Label skip; -+ __ andi(t0, dst, unit); -+ __ beqz(t0, skip); -+ copy_by_step(RegSet::of(t0), src, dst, 1, is_backwards ? -unit : unit); -+ __ sub(count_in_bytes, count_in_bytes, unit); -+ __ bind(skip); -+ } -+ -+ void copy_memory(bool is_align, Register s, Register d, Register count_in_elements, -+ Register tmp, int ele_step) { -+ -+ bool is_backwards = ele_step < 0; -+ unsigned int granularity = uabs(ele_step); -+ Label Lcopy_small, Ldone, Lcopy_ele, Laligned; -+ const Register count_in_bytes = x31, src = x28, dst = x29; -+ assert_different_registers(src, dst, count_in_elements, count_in_bytes, tmp, t1); -+ __ slli(count_in_bytes, count_in_elements, exact_log2(granularity)); -+ __ add(src, s, is_backwards ? count_in_bytes : zr); -+ __ add(dst, d, is_backwards ? count_in_bytes : zr); -+ -+ // if count_in_elements < 8, copy_small -+ __ mv(t0, 8); -+ if (is_align && granularity < BytesPerLong) { -+ __ blt(count_in_bytes, t0, Lcopy_small); -+ } else { -+ __ blt(count_in_elements, t0, Lcopy_small); -+ } -+ -+ if (granularity < BytesPerLong) { -+ Label Lcopy_aligned[3]; -+ Label Lalign8; -+ if (!is_align) { -+ Label Lalign_and_copy; -+ __ mv(t0, EagerArrayCopyThreshold); -+ __ blt(count_in_bytes, t0, Lalign_and_copy); -+ // Align dst to 8. -+ for (unsigned unit = granularity; unit <= 4; unit <<= 1) { -+ align_unit(src, dst, count_in_bytes, unit, is_backwards); -+ } -+ -+ Register shr = x30, shl = x7, tmp1 = x13; -+ -+ __ andi(shr, src, 0x7); -+ __ beqz(shr, Lalign8); -+ { -+ // calculaute the shift for store doubleword -+ __ slli(shr, shr, 3); -+ __ sub(shl, shr, 64); -+ __ sub(shl, zr, shl); -+ -+ // alsrc: previous position of src octal alignment -+ Register alsrc = t1; -+ __ andi(alsrc, src, -8); -+ -+ // move src to tail -+ __ andi(t0, count_in_bytes, -8); -+ if (is_backwards) { -+ __ sub(src, src, t0); -+ } else { -+ __ add(src, src, t0); -+ } -+ -+ // prepare for copy_dstaligned_loop -+ __ ld(tmp1, alsrc, 0); -+ dst_aligned_copy_32bytes_loop(alsrc, dst, shr, shl, count_in_bytes, is_backwards); -+ __ mv(x17, 8); -+ __ blt(count_in_bytes, x17, Lcopy_small); -+ dst_aligned_copy_8bytes_loop(alsrc, dst, shr, shl, count_in_bytes, x17, is_backwards); -+ __ j(Lcopy_small); -+ } -+ __ j(Ldone); -+ __ bind(Lalign_and_copy); -+ -+ // Check src and dst could be 8/4/2 algined at the same time. If could, align the -+ // memory and copy by 8/4/2. -+ __ xorr(t1, src, dst); -+ -+ for (unsigned alignment = granularity << 1; alignment <= 8; alignment <<= 1) { -+ Label skip; -+ unsigned int unit = alignment >> 1; -+ // Check src and dst could be aligned to checkbyte at the same time -+ // if copy from src to dst. If couldn't, jump to label not_aligned. -+ __ andi(t0, t1, alignment - 1); -+ __ bnez(t0, Lcopy_aligned[exact_log2(unit)]); -+ // Align src and dst to unit. -+ align_unit(src, dst, count_in_bytes, unit, is_backwards); -+ } -+ } -+ __ bind(Lalign8); -+ for (unsigned step_size = 8; step_size > granularity; step_size >>= 1) { -+ // Copy memory by steps, which has been aligned to step_size. -+ Label loop8, Ltail; -+ int step = is_backwards ? (-step_size) : step_size; -+ if (!(step_size == 8 && is_align)) { // which has load 8 to t0 before -+ // Check whether the memory size is smaller than step_size. -+ __ mv(t0, step_size); -+ __ blt(count_in_bytes, t0, Ltail); -+ } -+ const Register eight_step = t1; -+ __ mv(eight_step, step_size * 8); -+ __ bge(count_in_bytes, eight_step, loop8); -+ // If memory is less than 8*step_size bytes, loop by step. -+ copy_loop1(src, dst, count_in_bytes, step, t0); -+ copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size); -+ __ j(Ldone); -+ -+ __ bind(loop8); -+ // If memory is greater than or equal to 8*step_size bytes, loop by step*8. -+ copy_loop8(src, dst, count_in_bytes, tmp, step, NULL, eight_step); -+ __ bind(Ltail); -+ copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size); -+ __ j(Ldone); -+ -+ __ bind(Lcopy_aligned[exact_log2(step_size >> 1)]); -+ } ++ __ ld(tmp3, Address(src)); ++ __ sd(tmp3, Address(dst)); ++ if (!is_backwards) { ++ __ addi(src, src, wordSize); ++ __ addi(dst, dst, wordSize); + } -+ // If the ele_step is greater than 8, or the memory src and dst cannot -+ // be aligned with a number greater than the value of step. -+ // Cyclic copy with 8*ele_step. -+ copy_loop8(src, dst, count_in_bytes, tmp, ele_step, &Lcopy_small, noreg); ++ __ addi(cnt, cnt, -wordSize); ++ __ addi(tmp4, cnt, -8); ++ __ bgez(tmp4, copy8); // cnt >= 8, do next loop + -+ __ bind(Ldone); -+ } ++ __ beqz(cnt, done); + -+ void dst_aligned_copy_32bytes_loop(Register alsrc, Register dst, -+ Register shr, Register shl, -+ Register count_in_bytes, bool is_backwards) { -+ const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16, thirty_two = x17; -+ const Register sll_reg1 = is_backwards ? tmp1 : tmp2, -+ srl_reg1 = is_backwards ? tmp2 : tmp1, -+ sll_reg2 = is_backwards ? tmp2 : tmp3, -+ srl_reg2 = is_backwards ? tmp3 : tmp2, -+ sll_reg3 = is_backwards ? tmp3 : tmp4, -+ srl_reg3 = is_backwards ? tmp4 : tmp3, -+ sll_reg4 = is_backwards ? tmp4 : tmp1, -+ srl_reg4 = is_backwards ? tmp1 : tmp4; -+ assert_different_registers(t0, thirty_two, alsrc, shr, shl); -+ int unit = is_backwards ? -wordSize : wordSize; -+ int offset = is_backwards ? -wordSize : 0; -+ Label loop, done; -+ -+ __ mv(thirty_two, 32); -+ __ blt(count_in_bytes, thirty_two, done); -+ -+ __ bind(loop); -+ __ ld(tmp2, alsrc, unit); -+ __ sll(t0, sll_reg1, shl); -+ __ srl(tmp1, srl_reg1, shr); -+ __ orr(tmp1, tmp1, t0); -+ __ sd(tmp1, dst, offset); -+ -+ __ ld(tmp3, alsrc, unit * 2); -+ __ sll(t0, sll_reg2, shl); -+ __ srl(tmp2, srl_reg2, shr); -+ __ orr(tmp2, tmp2, t0); -+ __ sd(tmp2, dst, unit + offset); -+ -+ __ ld(tmp4, alsrc, unit * 3); -+ __ sll(t0, sll_reg3, shl); -+ __ srl(tmp3, srl_reg3, shr); -+ __ orr(tmp3, tmp3, t0); -+ __ sd(tmp3, dst, unit * 2 + offset); -+ -+ __ ld(tmp1, alsrc, unit * 4); -+ __ sll(t0, sll_reg4, shl); -+ __ srl(tmp4, srl_reg4, shr); -+ __ orr(tmp4, tmp4, t0); -+ __ sd(tmp4, dst, unit * 3 + offset); -+ -+ __ add(alsrc, alsrc, unit * 4); -+ __ add(dst, dst, unit * 4); -+ __ sub(count_in_bytes, count_in_bytes, 32); -+ __ bge(count_in_bytes, thirty_two, loop); ++ __ bind(copy_small); ++ if (is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ (_masm->*ld_arr)(tmp3, Address(src), t0); ++ (_masm->*st_arr)(tmp3, Address(dst), t0); ++ if (!is_backwards) { ++ __ addi(src, src, step); ++ __ addi(dst, dst, step); ++ } ++ __ addi(cnt, cnt, -granularity); ++ __ bgtz(cnt, copy_small); + + __ bind(done); + } + -+ void dst_aligned_copy_8bytes_loop(Register alsrc, Register dst, -+ Register shr, Register shl, -+ Register count_in_bytes, Register eight, -+ bool is_backwards) { -+ const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16; -+ const Register sll_reg = is_backwards ? tmp1 : tmp2, -+ srl_reg = is_backwards ? tmp2 : tmp1; -+ assert_different_registers(t0, eight, alsrc, shr, shl); -+ Label loop; -+ int unit = is_backwards ? -wordSize : wordSize; -+ -+ __ bind(loop); -+ __ ld(tmp2, alsrc, unit); -+ __ sll(t0, sll_reg, shl); -+ __ srl(tmp1, srl_reg, shr); -+ __ orr(t0, tmp1, t0); -+ __ sd(t0, dst, is_backwards ? unit : 0); -+ __ mv(tmp1, tmp2); -+ __ add(alsrc, alsrc, unit); -+ __ add(dst, dst, unit); -+ __ sub(count_in_bytes, count_in_bytes, 8); -+ __ bge(count_in_bytes, eight, loop); -+ } -+ + // Scan over array at a for count oops, verifying each one. + // Preserves a and count, clobbers t0 and t1. -+ void verify_oop_array(int size, Register a, Register count, Register temp) { ++ void verify_oop_array(size_t size, Register a, Register count, Register temp) { + Label loop, end; + __ mv(t1, zr); + __ slli(t0, count, exact_log2(size)); @@ -44374,7 +42125,7 @@ index 000000000..c5b3b094c + __ bgeu(t1, t0, end); + + __ add(temp, a, t1); -+ if (size == wordSize) { ++ if (size == (size_t)wordSize) { + __ ld(temp, Address(temp, 0)); + __ verify_oop(temp); + } else { @@ -44399,7 +42150,7 @@ index 000000000..c5b3b094c + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomically. ++ // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point @@ -44435,16 +42186,17 @@ index 000000000..c5b3b094c + // save regs before copy_memory + __ push_reg(RegSet::of(d, count), sp); + } -+ copy_memory(aligned, s, d, count, t0, checked_cast(size)); ++ ++ copy_memory(aligned, s, d, count, t0, size); + + if (is_oop) { + __ pop_reg(RegSet::of(d, count), sp); + if (VerifyOops) { -+ verify_oop_array(checked_cast(size), d, count, t2); ++ verify_oop_array(size, d, count, t2); + } + } + -+ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_reg); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet()); + + __ leave(); + __ mv(x10, zr); // return 0 @@ -44465,9 +42217,9 @@ index 000000000..c5b3b094c + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomically. ++ // cache line boundaries will still be loaded and stored atomicly. + // -+ address generate_conjoint_copy(int size, bool aligned, bool is_oop, address nooverlap_target, ++ address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target, + address* entry, const char* name, + bool dest_uninitialized = false) { + const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; @@ -44504,13 +42256,14 @@ index 000000000..c5b3b094c + } + + copy_memory(aligned, s, d, count, t0, -size); ++ + if (is_oop) { + __ pop_reg(RegSet::of(d, count), sp); + if (VerifyOops) { + verify_oop_array(size, d, count, t2); + } + } -+ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_regs); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, RegSet()); + __ leave(); + __ mv(x10, zr); // return 0 + __ ret(); @@ -44627,7 +42380,7 @@ index 000000000..c5b3b094c + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomically. ++ // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point @@ -44651,7 +42404,7 @@ index 000000000..c5b3b094c + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span -+ // cache line boundaries will still be loaded and stored atomically. ++ // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_int_copy(bool aligned, address nooverlap_target, + address* entry, const char* name, @@ -44715,7 +42468,7 @@ index 000000000..c5b3b094c + address generate_disjoint_oop_copy(bool aligned, address* entry, + const char* name, bool dest_uninitialized) { + const bool is_oop = true; -+ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); ++ const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); + return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized); + } + @@ -44733,7 +42486,7 @@ index 000000000..c5b3b094c + address nooverlap_target, address* entry, + const char* name, bool dest_uninitialized) { + const bool is_oop = true; -+ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); ++ const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); + return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, + name, dest_uninitialized); + } @@ -44807,7 +42560,7 @@ index 000000000..c5b3b094c + + __ enter(); // required for proper stackwalking of RuntimeStub frame + -+ // Caller of this entry point must set up the argument registers ++ // Caller of this entry point must set up the argument registers. + if (entry != NULL) { + *entry = __ pc(); + BLOCK_COMMENT("Entry:"); @@ -44859,7 +42612,7 @@ index 000000000..c5b3b094c + __ align(OptoLoopAlignment); + + __ BIND(L_store_element); -+ __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop ++ __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, AS_RAW); // store the oop + __ add(to, to, UseCompressedOops ? 4 : 8); + __ sub(count, count, 1); + __ beqz(count, L_do_card_marks); @@ -45015,13 +42768,13 @@ index 000000000..c5b3b094c + const Register dst_pos = c_rarg3; // destination position + const Register length = c_rarg4; + ++ // Registers used as temps ++ const Register dst_klass = c_rarg5; ++ + __ align(CodeEntryAlignment); + + StubCodeMark mark(this, "StubRoutines", name); + -+ // Registers used as temps -+ const Register dst_klass = c_rarg5; -+ + address start = __ pc(); + + __ enter(); // required for proper stackwalking of RuntimeStub frame @@ -45265,7 +43018,7 @@ index 000000000..c5b3b094c + } + + __ BIND(L_failed); -+ __ mv(x10, -1); ++ __ li(x10, -1); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(); + @@ -45456,6 +43209,9 @@ index 000000000..c5b3b094c + address entry_jlong_arraycopy = NULL; + address entry_checkcast_arraycopy = NULL; + ++ generate_copy_longs(copy_f, c_rarg0, c_rarg1, t1, copy_forwards); ++ generate_copy_longs(copy_b, c_rarg0, c_rarg1, t1, copy_backwards); ++ + StubRoutines::riscv::_zero_blocks = generate_zero_blocks(); + + //*** jbyte @@ -45598,42 +43354,42 @@ index 000000000..c5b3b094c + __ ret(); + } + -+#ifdef COMPILER2 + // code for comparing 16 bytes of strings with same encoding -+ void compare_string_16_bytes_same(Label& DIFF1, Label& DIFF2) { ++ void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) { + const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31; + __ ld(tmp5, Address(str1)); -+ __ addi(str1, str1, wordSize); ++ __ addi(str1, str1, 8); + __ xorr(tmp4, tmp1, tmp2); + __ ld(cnt1, Address(str2)); -+ __ addi(str2, str2, wordSize); ++ __ addi(str2, str2, 8); + __ bnez(tmp4, DIFF1); + __ ld(tmp1, Address(str1)); -+ __ addi(str1, str1, wordSize); ++ __ addi(str1, str1, 8); + __ xorr(tmp4, tmp5, cnt1); + __ ld(tmp2, Address(str2)); -+ __ addi(str2, str2, wordSize); ++ __ addi(str2, str2, 8); + __ bnez(tmp4, DIFF2); + } + + // code for comparing 8 characters of strings with Latin1 and Utf16 encoding -+ void compare_string_8_x_LU(Register tmpL, Register tmpU, Register strL, Register strU, Label& DIFF) { -+ const Register tmp = x30; ++ void compare_string_8_x_LU(Register tmpL, Register tmpU, Label &DIFF1, ++ Label &DIFF2) { ++ const Register strU = x12, curU = x7, strL = x29, tmp = x30; + __ ld(tmpL, Address(strL)); -+ __ addi(strL, strL, wordSize); ++ __ addi(strL, strL, 8); + __ ld(tmpU, Address(strU)); -+ __ addi(strU, strU, wordSize); ++ __ addi(strU, strU, 8); + __ inflate_lo32(tmp, tmpL); + __ mv(t0, tmp); -+ __ xorr(tmp, tmpU, t0); -+ __ bnez(tmp, DIFF); ++ __ xorr(tmp, curU, t0); ++ __ bnez(tmp, DIFF2); + -+ __ ld(tmpU, Address(strU)); -+ __ addi(strU, strU, wordSize); ++ __ ld(curU, Address(strU)); ++ __ addi(strU, strU, 8); + __ inflate_hi32(tmp, tmpL); + __ mv(t0, tmp); + __ xorr(tmp, tmpU, t0); -+ __ bnez(tmp, DIFF); ++ __ bnez(tmp, DIFF1); + } + + // x10 = result @@ -45648,7 +43404,8 @@ index 000000000..c5b3b094c + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL"); + address entry = __ pc(); -+ Label SMALL_LOOP, TAIL, LOAD_LAST, DIFF, DONE, CALCULATE_DIFFERENCE; ++ Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2, ++ DONE, CALCULATE_DIFFERENCE; + const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14, + tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31; + RegSet spilled_regs = RegSet::of(tmp4, tmp5); @@ -45659,9 +43416,16 @@ index 000000000..c5b3b094c + __ mv(isLU ? tmp1 : tmp2, tmp3); + __ addi(str1, str1, isLU ? wordSize / 2 : wordSize); + __ addi(str2, str2, isLU ? wordSize : wordSize / 2); -+ __ sub(cnt2, cnt2, wordSize / 2); // Already loaded 4 symbols. ++ __ sub(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case. + __ push_reg(spilled_regs, sp); + ++ if (isLU) { ++ __ add(str1, str1, cnt2); ++ __ shadd(str2, cnt2, str2, t0, 1); ++ } else { ++ __ shadd(str1, cnt2, str1, t0, 1); ++ __ add(str2, str2, cnt2); ++ } + __ xorr(tmp3, tmp1, tmp2); + __ mv(tmp5, tmp2); + __ bnez(tmp3, CALCULATE_DIFFERENCE); @@ -45671,71 +43435,47 @@ index 000000000..c5b3b094c + tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison + tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison + -+ // make sure main loop is byte-aligned, we should load another 4 bytes from strL -+ __ beqz(cnt2, DONE); // no characters left -+ __ lwu(tmpL, Address(strL)); -+ __ addi(strL, strL, wordSize / 2); -+ __ ld(tmpU, Address(strU)); -+ __ addi(strU, strU, wordSize); -+ __ inflate_lo32(tmp3, tmpL); -+ __ mv(tmpL, tmp3); -+ __ xorr(tmp3, tmpU, tmpL); -+ __ bnez(tmp3, CALCULATE_DIFFERENCE); -+ __ addi(cnt2, cnt2, -wordSize / 2); ++ __ sub(tmp2, strL, cnt2); // strL pointer to load from ++ __ slli(t0, cnt2, 1); ++ __ sub(cnt1, strU, t0); // strU pointer to load from + -+ __ beqz(cnt2, DONE); // no character left -+ __ sub(cnt2, cnt2, wordSize * 2); ++ __ ld(tmp4, Address(cnt1)); ++ __ addi(cnt1, cnt1, 8); ++ __ beqz(cnt2, LOAD_LAST); // no characters left except last load ++ __ sub(cnt2, cnt2, 16); + __ bltz(cnt2, TAIL); + __ bind(SMALL_LOOP); // smaller loop -+ __ sub(cnt2, cnt2, wordSize * 2); -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); ++ __ sub(cnt2, cnt2, 16); ++ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); ++ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); + __ bgez(cnt2, SMALL_LOOP); -+ __ addi(t0, cnt2, wordSize * 2); -+ __ beqz(t0, DONE); -+ __ bind(TAIL); // 1..15 characters left -+ if (AvoidUnalignedAccesses) { -+ // Aligned access. Load bytes from byte-aligned address, -+ // which may contain invalid bytes in last load. -+ // Invalid bytes should be removed before comparison. -+ Label LOAD_LAST, WORD_CMP; -+ __ addi(t0, cnt2, wordSize); -+ __ bgtz(t0, LOAD_LAST); -+ // remaining characters is greater than or equals to 8, we can do one compare_string_8_x_LU -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ __ addi(cnt2, cnt2, wordSize); -+ __ beqz(cnt2, DONE); // no character left -+ __ bind(LOAD_LAST); // 1..7 characters left -+ __ lwu(tmpL, Address(strL)); -+ __ addi(strL, strL, wordSize / 2); -+ __ ld(tmpU, Address(strU)); -+ __ addi(strU, strU, wordSize); -+ __ inflate_lo32(tmp3, tmpL); -+ __ mv(tmpL, tmp3); -+ __ addi(t0, cnt2, wordSize / 2); -+ __ blez(t0, WORD_CMP); -+ __ slli(t0, t0, 1); // now in bytes -+ __ slli(t0, t0, LogBitsPerByte); -+ __ sll(tmpL, tmpL, t0); -+ __ sll(tmpU, tmpU, t0); -+ // remaining characters is greater than or equals to 4, we can do one full 4-byte comparison -+ __ bind(WORD_CMP); -+ __ xorr(tmp3, tmpU, tmpL); -+ __ bnez(tmp3, CALCULATE_DIFFERENCE); -+ __ addi(cnt2, cnt2, wordSize / 2); -+ __ bltz(cnt2, LOAD_LAST); // 1..3 characters left -+ __ j(DONE); // no character left -+ } else { -+ // Unaligned accesses. Load from non-byte aligned address. -+ __ shadd(strU, cnt2, strU, t0, 1); // convert cnt2 into bytes and get Address of last 8 bytes in UTF-16 string -+ __ add(strL, strL, cnt2); // Address of last 16 bytes in Latin1 string -+ // last 16 characters -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); -+ __ j(DONE); -+ } -+ __ bind(DIFF); ++ __ addi(t0, cnt2, 16); ++ __ beqz(t0, LOAD_LAST); ++ __ bind(TAIL); // 1..15 characters left until last load (last 4 characters) ++ // Address of 8 bytes before last 4 characters in UTF-16 string ++ __ shadd(cnt1, cnt2, cnt1, t0, 1); ++ // Address of 16 bytes before last 4 characters in Latin1 string ++ __ add(tmp2, tmp2, cnt2); ++ __ ld(tmp4, Address(cnt1, -8)); ++ // last 16 characters before last load ++ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); ++ compare_string_8_x_LU(tmpL, tmpU, DIFF1, DIFF2); ++ __ j(LOAD_LAST); ++ __ bind(DIFF2); ++ __ mv(tmpU, tmp4); ++ __ bind(DIFF1); + __ mv(tmpL, t0); ++ __ j(CALCULATE_DIFFERENCE); ++ __ bind(LOAD_LAST); ++ // Last 4 UTF-16 characters are already pre-loaded into tmp4 by compare_string_8_x_LU. ++ // No need to load it again ++ __ mv(tmpU, tmp4); ++ __ ld(tmpL, Address(strL)); ++ __ inflate_lo32(tmp3, tmpL); ++ __ mv(tmpL, tmp3); ++ __ xorr(tmp3, tmpU, tmpL); ++ __ beqz(tmp3, DONE); ++ + // Find the first different characters in the longwords and + // compute their difference. + __ bind(CALCULATE_DIFFERENCE); @@ -45778,49 +43518,35 @@ index 000000000..c5b3b094c + __ add(str1, str1, wordSize); + __ add(str2, str2, wordSize); + // less than 16 bytes left? -+ __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize); ++ __ sub(cnt2, cnt2, isLL ? 16 : 8); + __ push_reg(spilled_regs, sp); + __ bltz(cnt2, TAIL); + __ bind(SMALL_LOOP); + compare_string_16_bytes_same(DIFF, DIFF2); -+ __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize); ++ __ sub(cnt2, cnt2, isLL ? 16 : 8); + __ bgez(cnt2, SMALL_LOOP); + __ bind(TAIL); -+ __ addi(cnt2, cnt2, isLL ? 2 * wordSize : wordSize); ++ __ addi(cnt2, cnt2, isLL ? 16 : 8); + __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); -+ __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2); ++ __ sub(cnt2, cnt2, isLL ? 8 : 4); + __ blez(cnt2, CHECK_LAST); + __ xorr(tmp4, tmp1, tmp2); + __ bnez(tmp4, DIFF); + __ ld(tmp1, Address(str1)); -+ __ addi(str1, str1, wordSize); ++ __ addi(str1, str1, 8); + __ ld(tmp2, Address(str2)); -+ __ addi(str2, str2, wordSize); -+ __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2); ++ __ addi(str2, str2, 8); ++ __ sub(cnt2, cnt2, isLL ? 8 : 4); + __ bind(CHECK_LAST); + if (!isLL) { + __ add(cnt2, cnt2, cnt2); // now in bytes + } + __ xorr(tmp4, tmp1, tmp2); + __ bnez(tmp4, DIFF); -+ if (AvoidUnalignedAccesses) { -+ // Aligned access. Load bytes from byte-aligned address, -+ // which may contain invalid bytes in last load. -+ // Invalid bytes should be removed before comparison. -+ __ ld(tmp5, Address(str1)); -+ __ ld(cnt1, Address(str2)); -+ __ neg(cnt2, cnt2); -+ __ slli(cnt2, cnt2, LogBitsPerByte); -+ __ sll(tmp5, tmp5, cnt2); -+ __ sll(cnt1, cnt1, cnt2); -+ } else { -+ // Unaligned access. Load from non-byte aligned address. -+ __ add(str1, str1, cnt2); -+ __ ld(tmp5, Address(str1)); -+ __ add(str2, str2, cnt2); -+ __ ld(cnt1, Address(str2)); -+ } -+ ++ __ add(str1, str1, cnt2); ++ __ ld(tmp5, Address(str1)); ++ __ add(str2, str2, cnt2); ++ __ ld(cnt1, Address(str2)); + __ xorr(tmp4, tmp5, cnt1); + __ beqz(tmp4, LENGTH_DIFF); + // Find the first different characters in the longwords and @@ -45889,7 +43615,7 @@ index 000000000..c5b3b094c + // parameters + Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14; + // temporary registers -+ Register mask1 = x20, match_mask = x21, first = x22, trailing_zero = x23, mask2 = x24, tmp = x25; ++ Register mask1 = x20, match_mask = x21, first = x22, trailing_zeros = x23, mask2 = x24, tmp = x25; + // redefinitions + Register ch1 = x28, ch2 = x29; + RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29); @@ -45910,9 +43636,13 @@ index 000000000..c5b3b094c + + // first is needle[0] + __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first); -+ __ mv(mask1, haystack_isL ? 0x0101010101010101 : 0x0001000100010001); ++ uint64_t mask0101 = UCONST64(0x0101010101010101); ++ uint64_t mask0001 = UCONST64(0x0001000100010001); ++ __ mv(mask1, haystack_isL ? mask0101 : mask0001); + __ mul(first, first, mask1); -+ __ mv(mask2, haystack_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); ++ uint64_t mask7f7f = UCONST64(0x7f7f7f7f7f7f7f7f); ++ uint64_t mask7fff = UCONST64(0x7fff7fff7fff7fff); ++ __ mv(mask2, haystack_isL ? mask7f7f : mask7fff); + if (needle_isL != haystack_isL) { + __ mv(tmp, ch1); + } @@ -45920,7 +43650,7 @@ index 000000000..c5b3b094c + __ blez(haystack_len, L_SMALL); + + if (needle_isL != haystack_isL) { -+ __ inflate_lo32(ch1, tmp, match_mask, trailing_zero); ++ __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros); + } + // xorr, sub, orr, notr, andr + // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i] @@ -45957,7 +43687,7 @@ index 000000000..c5b3b094c + __ xorr(ch2, first, ch2); + __ sub(match_mask, ch2, mask1); + __ orr(ch2, ch2, mask2); -+ __ mv(trailing_zero, -1); // all bits set ++ __ mv(trailing_zeros, -1); // all bits set + __ j(L_SMALL_PROCEED); + + __ align(OptoLoopAlignment); @@ -45965,42 +43695,42 @@ index 000000000..c5b3b094c + __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift); + __ neg(haystack_len, haystack_len); + if (needle_isL != haystack_isL) { -+ __ inflate_lo32(ch1, tmp, match_mask, trailing_zero); ++ __ inflate_lo32(ch1, tmp, match_mask, trailing_zeros); + } + __ xorr(ch2, first, ch2); + __ sub(match_mask, ch2, mask1); + __ orr(ch2, ch2, mask2); -+ __ mv(trailing_zero, -1); // all bits set ++ __ mv(trailing_zeros, -1); // all bits set + + __ bind(L_SMALL_PROCEED); -+ __ srl(trailing_zero, trailing_zero, haystack_len); // mask. zeroes on useless bits. ++ __ srl(trailing_zeros, trailing_zeros, haystack_len); // mask. zeroes on useless bits. + __ notr(ch2, ch2); + __ andr(match_mask, match_mask, ch2); -+ __ andr(match_mask, match_mask, trailing_zero); // clear useless bits and check ++ __ andr(match_mask, match_mask, trailing_zeros); // clear useless bits and check + __ beqz(match_mask, NOMATCH); + + __ bind(L_SMALL_HAS_ZERO_LOOP); -+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, ch2, tmp); // count trailing zeros -+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); ++ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, ch2, tmp); // count trailing zeros ++ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); + __ mv(ch2, wordSize / haystack_chr_size); + __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2); -+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); -+ __ mv(trailing_zero, wordSize / haystack_chr_size); ++ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); ++ __ mv(trailing_zeros, wordSize / haystack_chr_size); + __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); + + __ bind(L_SMALL_CMP_LOOP); -+ __ shadd(first, trailing_zero, needle, first, needle_chr_shift); -+ __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift); ++ __ shadd(first, trailing_zeros, needle, first, needle_chr_shift); ++ __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift); + needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first)); + haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); -+ __ add(trailing_zero, trailing_zero, 1); -+ __ bge(trailing_zero, needle_len, L_SMALL_CMP_LOOP_LAST_CMP); ++ __ add(trailing_zeros, trailing_zeros, 1); ++ __ bge(trailing_zeros, needle_len, L_SMALL_CMP_LOOP_LAST_CMP); + __ beq(first, ch2, L_SMALL_CMP_LOOP); + + __ bind(L_SMALL_CMP_LOOP_NOMATCH); + __ beqz(match_mask, NOMATCH); -+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2); -+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); ++ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); ++ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); + __ add(result, result, 1); + __ add(haystack, haystack, haystack_chr_size); + __ j(L_SMALL_HAS_ZERO_LOOP); @@ -46012,14 +43742,14 @@ index 000000000..c5b3b094c + + __ align(OptoLoopAlignment); + __ bind(L_SMALL_CMP_LOOP_LAST_CMP2); -+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); ++ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); + __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); + __ j(DONE); + + __ align(OptoLoopAlignment); + __ bind(L_HAS_ZERO); -+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2); -+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); ++ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, tmp, ch2); ++ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); + __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2); + __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits) + __ sub(result, result, 1); // array index from 0, so result -= 1 @@ -46029,26 +43759,26 @@ index 000000000..c5b3b094c + __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2); + __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2); + // load next 8 bytes from haystack, and increase result index -+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); ++ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); + __ add(result, result, 1); -+ __ mv(trailing_zero, wordSize / haystack_chr_size); ++ __ mv(trailing_zeros, wordSize / haystack_chr_size); + __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); + + // compare one char + __ bind(L_CMP_LOOP); -+ __ shadd(needle_len, trailing_zero, needle, needle_len, needle_chr_shift); ++ __ shadd(needle_len, trailing_zeros, needle, needle_len, needle_chr_shift); + needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len)); -+ __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift); ++ __ shadd(ch2, trailing_zeros, haystack, ch2, haystack_chr_shift); + haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); -+ __ add(trailing_zero, trailing_zero, 1); // next char index ++ __ add(trailing_zeros, trailing_zeros, 1); // next char index + __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2); -+ __ bge(trailing_zero, tmp, L_CMP_LOOP_LAST_CMP); ++ __ bge(trailing_zeros, tmp, L_CMP_LOOP_LAST_CMP); + __ beq(needle_len, ch2, L_CMP_LOOP); + + __ bind(L_CMP_LOOP_NOMATCH); + __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH); -+ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index -+ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); ++ __ ctzc_bit(trailing_zeros, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index ++ __ addi(trailing_zeros, trailing_zeros, haystack_isL ? 7 : 15); + __ add(haystack, haystack, haystack_chr_size); + __ j(L_HAS_ZERO_LOOP); + @@ -46059,7 +43789,7 @@ index 000000000..c5b3b094c + + __ align(OptoLoopAlignment); + __ bind(L_CMP_LOOP_LAST_CMP2); -+ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); ++ __ compute_index(haystack, trailing_zeros, match_mask, result, ch2, tmp, haystack_isL); + __ add(result, result, 1); + __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); + __ j(DONE); @@ -46101,28 +43831,28 @@ index 000000000..c5b3b094c + StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false); + } + ++#ifdef COMPILER2 + address generate_mulAdd() + { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "mulAdd"); + -+ address start = __ pc(); ++ address entry = __ pc(); + + const Register out = x10; + const Register in = x11; + const Register offset = x12; + const Register len = x13; + const Register k = x14; -+ const Register tmp1 = x28; -+ const Register tmp2 = x29; ++ const Register tmp = x28; + + BLOCK_COMMENT("Entry:"); + __ enter(); -+ __ mul_add(out, in, offset, len, k, tmp1, tmp2); ++ __ mul_add(out, in, offset, len, k, tmp); + __ leave(); + __ ret(); + -+ return start; ++ return entry; + } + + /** @@ -46132,7 +43862,7 @@ index 000000000..c5b3b094c + * c_rarg0 - x address + * c_rarg1 - x length + * c_rarg2 - y address -+ * c_rarg3 - y lenth ++ * c_rarg3 - y length + * c_rarg4 - z address + * c_rarg5 - z length + */ @@ -46140,7 +43870,7 @@ index 000000000..c5b3b094c + { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); -+ address start = __ pc(); ++ address entry = __ pc(); + + const Register x = x10; + const Register xlen = x11; @@ -46157,26 +43887,20 @@ index 000000000..c5b3b094c + const Register tmp6 = x30; + const Register tmp7 = x31; + -+ RegSet spilled_regs = RegSet::of(tmp1, tmp2); + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame -+ __ push_reg(spilled_regs, sp); + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); -+ __ pop_reg(spilled_regs, sp); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(); + -+ return start; ++ return entry; + } + + address generate_squareToLen() + { -+ // squareToLen algorithm for sizes 1..127 described in java code works -+ // faster than multiply_to_len on some CPUs and slower on others, but -+ // multiply_to_len shows a bit better overall results + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "squareToLen"); -+ address start = __ pc(); ++ address entry = __ pc(); + + const Register x = x10; + const Register xlen = x11; @@ -46193,131 +43917,17 @@ index 000000000..c5b3b094c + const Register tmp6 = x30; + const Register tmp7 = x31; + -+ RegSet spilled_regs = RegSet::of(y, tmp2); + BLOCK_COMMENT("Entry:"); + __ enter(); -+ __ push_reg(spilled_regs, sp); + __ mv(y, x); + __ mv(ylen, xlen); + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); -+ __ pop_reg(spilled_regs, sp); + __ leave(); + __ ret(); + -+ return start; -+ } -+#endif // COMPILER2 -+ -+ // Continuation point for throwing of implicit exceptions that are -+ // not handled in the current activation. Fabricates an exception -+ // oop and initiates normal exception dispatching in this -+ // frame. Since we need to preserve callee-saved values (currently -+ // only for C2, but done for C1 as well) we need a callee-saved oop -+ // map and therefore have to make these stubs into RuntimeStubs -+ // rather than BufferBlobs. If the compiler needs all registers to -+ // be preserved between the fault point and the exception handler -+ // then it must assume responsibility for that in -+ // AbstractCompiler::continuation_for_implicit_null_exception or -+ // continuation_for_implicit_division_by_zero_exception. All other -+ // implicit exceptions (e.g., NullPointerException or -+ // AbstractMethodError on entry) are either at call sites or -+ // otherwise assume that stack unwinding will be initiated, so -+ // caller saved registers were assumed volatile in the compiler. -+ -+#undef __ -+#define __ masm-> -+ -+ address generate_throw_exception(const char* name, -+ address runtime_entry, -+ Register arg1 = noreg, -+ Register arg2 = noreg) { -+ // Information about frame layout at time of blocking runtime call. -+ // Note that we only have to preserve callee-saved registers since -+ // the compilers are responsible for supplying a continuation point -+ // if they expect all registers to be preserved. -+ // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0 -+ assert_cond(runtime_entry != NULL); -+ enum layout { -+ fp_off = 0, -+ fp_off2, -+ return_off, -+ return_off2, -+ framesize // inclusive of return address -+ }; -+ -+ const int insts_size = 512; -+ const int locs_size = 64; -+ -+ CodeBuffer code(name, insts_size, locs_size); -+ OopMapSet* oop_maps = new OopMapSet(); -+ MacroAssembler* masm = new MacroAssembler(&code); -+ assert_cond(oop_maps != NULL && masm != NULL); -+ -+ address start = __ pc(); -+ -+ // This is an inlined and slightly modified version of call_VM -+ // which has the ability to fetch the return PC out of -+ // thread-local storage and also sets up last_Java_sp slightly -+ // differently than the real call_VM -+ -+ __ enter(); // Save FP and RA before call -+ -+ assert(is_even(framesize / 2), "sp not 16-byte aligned"); -+ -+ // ra and fp are already in place -+ __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog -+ -+ int frame_complete = __ pc() - start; -+ -+ // Set up last_Java_sp and last_Java_fp -+ address the_pc = __ pc(); -+ __ set_last_Java_frame(sp, fp, the_pc, t0); -+ -+ // Call runtime -+ if (arg1 != noreg) { -+ assert(arg2 != c_rarg1, "clobbered"); -+ __ mv(c_rarg1, arg1); -+ } -+ if (arg2 != noreg) { -+ __ mv(c_rarg2, arg2); -+ } -+ __ mv(c_rarg0, xthread); -+ BLOCK_COMMENT("call runtime_entry"); -+ int32_t offset = 0; -+ __ movptr_with_offset(t0, runtime_entry, offset); -+ __ jalr(x1, t0, offset); -+ -+ // Generate oop map -+ OopMap* map = new OopMap(framesize, 0); -+ assert_cond(map != NULL); -+ -+ oop_maps->add_gc_map(the_pc - start, map); -+ -+ __ reset_last_Java_frame(true); -+ -+ __ leave(); -+ -+ // check for pending exceptions -+#ifdef ASSERT -+ Label L; -+ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); -+ __ bnez(t0, L); -+ __ should_not_reach_here(); -+ __ bind(L); -+#endif // ASSERT -+ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); -+ -+ -+ // codeBlob framesize is in words (not VMRegImpl::slot_size) -+ RuntimeStub* stub = -+ RuntimeStub::new_runtime_stub(name, -+ &code, -+ frame_complete, -+ (framesize >> (LogBytesPerWord - LogBytesPerInt)), -+ oop_maps, false); -+ assert(stub != NULL, "create runtime stub fail!"); -+ return stub->entry_point(); ++ return entry; + } ++#endif + +#ifdef COMPILER2 + class MontgomeryMultiplyGenerator : public MacroAssembler { @@ -46534,10 +44144,12 @@ index 000000000..c5b3b094c + // Rj == i-len + + // Ra as temp register -+ shadd(Pa, Rj, Pa_base, Ra, LogBytesPerWord); -+ shadd(Pm, Rj, Pm_base, Ra, LogBytesPerWord); -+ shadd(Pb, len, Pb_base, Ra, LogBytesPerWord); -+ shadd(Pn, len, Pn_base, Ra, LogBytesPerWord); ++ slli(Ra, Rj, LogBytesPerWord); ++ add(Pa, Pa_base, Ra); ++ add(Pm, Pm_base, Ra); ++ slli(Ra, len, LogBytesPerWord); ++ add(Pb, Pb_base, Ra); ++ add(Pn, Pn_base, Ra); + + // Ra = *++Pa; + // Rb = *--Pb; @@ -46566,7 +44178,8 @@ index 000000000..c5b3b094c + // store it. + // Pm_base[i-len] = tmp0; + // Rj as temp register -+ shadd(Rj, Rj, Pm_base, Rj, LogBytesPerWord); ++ slli(Rj, Rj, LogBytesPerWord); ++ add(Rj, Pm_base, Rj); + sd(tmp0, Address(Rj)); + + // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; @@ -46593,14 +44206,15 @@ index 000000000..c5b3b094c + ld(Rm, Address(Rm)); + add(Rn, Pn_base, Rn); + ld(Rn, Address(Rn)); -+ mv(t0, 1); // set carry flag, i.e. no borrow ++ li(t0, 1); // set carry flag, i.e. no borrow + align(16); + bind(loop); { + notr(Rn, Rn); + add(Rm, Rm, t0); + add(Rm, Rm, Rn); + sltu(t0, Rm, Rn); -+ shadd(Rn, i, Pm_base, Rn, LogBytesPerWord); // Rn as temp register ++ slli(Rn, i, LogBytesPerWord); // Rn as temp register ++ add(Rn, Pm_base, Rn); + sd(Rm, Address(Rn)); + add(i, i, 1); + slli(Rn, i, LogBytesPerWord); @@ -46624,7 +44238,8 @@ index 000000000..c5b3b094c + void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { + assert(tmp1 < x28 && tmp2 < x28, "register corruption"); + -+ shadd(s, len, s, tmp1, LogBytesPerWord); ++ slli(tmp1, len, LogBytesPerWord); ++ add(s, s, tmp1); + mv(tmp1, len); + unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); + slli(tmp1, len, LogBytesPerWord); @@ -46673,7 +44288,6 @@ index 000000000..c5b3b094c + ld(Rn, Address(Pn)); + } + -+ + void post1_squaring() { + acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n + @@ -46704,7 +44318,7 @@ index 000000000..c5b3b094c + // the carry flag iff tmp0 is nonzero. + // + // mul(Rlo_mn, Rm, Rn); -+ // cad(zr, tmp0, Rlo_mn); ++ // cad(zr, tmp, Rlo_mn); + addi(t0, tmp0, -1); + sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero + cadc(tmp0, tmp1, Rhi_mn, t0); @@ -46757,7 +44371,7 @@ index 000000000..c5b3b094c + enter(); + + // Make room. -+ mv(Ra, 512); ++ li(Ra, 512); + bgt(Rlen, Ra, argh); + slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); + sub(Ra, sp, Ra); @@ -46783,7 +44397,7 @@ index 000000000..c5b3b094c + { + ld(Rn, Address(Pn_base)); + mul(Rlo_mn, Rn, inv); -+ mv(t0, -1); ++ li(t0, -1); + Label ok; + beq(Rlo_mn, t0, ok); + stop("broken inverse in Montgomery multiply"); @@ -46820,8 +44434,8 @@ index 000000000..c5b3b094c + block_comment("for (int i = len; i < 2*len; i++) {"); + mv(Ri, Rlen); { + Label loop, end; -+ slli(Rj, Rlen, 1); // Rj as temp register -+ bge(Ri, Rj, end); ++ slli(t0, Rlen, 1); ++ bge(Ri, t0, end); + + bind(loop); + pre2(Ri, Rlen); @@ -46835,13 +44449,12 @@ index 000000000..c5b3b094c + + post2(Ri, Rlen); + addw(Ri, Ri, 1); -+ slli(Rj, Rlen, 1); -+ blt(Ri, Rj, loop); ++ slli(t0, Rlen, 1); ++ blt(Ri, t0, loop); + bind(end); + } + block_comment("} // i"); + -+ + normalize(Rlen); + + mv(Ra, Pm_base); // Save Pm_base in Ra @@ -46880,7 +44493,7 @@ index 000000000..c5b3b094c + enter(); + + // Make room. -+ mv(Ra, 512); ++ li(Ra, 512); + bgt(Rlen, Ra, argh); + slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); + sub(Ra, sp, Ra); @@ -46938,8 +44551,8 @@ index 000000000..c5b3b094c + mv(Ri, Rlen); { + Label loop, end; + bind(loop); -+ slli(Rj, Rlen, 1); -+ bge(Ri, Rj, end); ++ slli(t0, Rlen, 1); ++ bge(Ri, t0, end); + + pre2(Ri, Rlen); + @@ -46985,6 +44598,117 @@ index 000000000..c5b3b094c + }; +#endif // COMPILER2 + ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. ++ ++#undef __ ++#define __ masm-> ++ ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ Register arg1 = noreg, ++ Register arg2 = noreg) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0 ++ assert_cond(runtime_entry != NULL); ++ enum layout { ++ fp_off = 0, ++ fp_off2, ++ return_off, ++ return_off2, ++ framesize // inclusive of return address ++ }; ++ ++ const int insts_size = 512; ++ const int locs_size = 64; ++ ++ CodeBuffer code(name, insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ assert_cond(oop_maps != NULL && masm != NULL); ++ ++ address start = __ pc(); ++ ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM ++ ++ __ enter(); // Save FP and RA before call ++ ++ assert(is_even(framesize / 2), "sp not 16-byte aligned"); ++ ++ // ra and fp are already in place ++ __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog ++ ++ int frame_complete = __ pc() - start; ++ ++ // Set up last_Java_sp and last_Java_fp ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, fp, the_pc, t0); ++ ++ // Call runtime ++ if (arg1 != noreg) { ++ assert(arg2 != c_rarg1, "clobbered"); ++ __ mv(c_rarg1, arg1); ++ } ++ if (arg2 != noreg) { ++ __ mv(c_rarg2, arg2); ++ } ++ __ mv(c_rarg0, xthread); ++ BLOCK_COMMENT("call runtime_entry"); ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, runtime_entry, offset); ++ __ jalr(x1, t0, offset); ++ ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ assert_cond(map != NULL); ++ ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ __ reset_last_Java_frame(true); ++ ++ __ leave(); ++ ++ // check for pending exceptions ++#ifdef ASSERT ++ Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ bnez(t0, L); ++ __ should_not_reach_here(); ++ __ bind(L); ++#endif // ASSERT ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ ++ ++ // codeBlob framesize is in words (not VMRegImpl::slot_size) ++ RuntimeStub* stub = ++ RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ (framesize >> (LogBytesPerWord - LogBytesPerInt)), ++ oop_maps, false); ++ assert(stub != NULL, "create runtime stub fail!"); ++ return stub->entry_point(); ++ } ++ + // Initialization + void generate_initial() { + // Generate initial stubs and initializes the entry points @@ -47012,6 +44736,13 @@ index 000000000..c5b3b094c + generate_throw_exception("delayed StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime::throw_delayed_StackOverflowError)); ++ // Safefetch stubs. ++ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, ++ &StubRoutines::_safefetch32_fault_pc, ++ &StubRoutines::_safefetch32_continuation_pc); ++ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, ++ &StubRoutines::_safefetchN_fault_pc, ++ &StubRoutines::_safefetchN_continuation_pc); + } + + void generate_all() { @@ -47050,10 +44781,6 @@ index 000000000..c5b3b094c + StubRoutines::_squareToLen = generate_squareToLen(); + } + -+ generate_compare_long_strings(); -+ -+ generate_string_indexof_stubs(); -+ + if (UseMontgomeryMultiplyIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); + MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); @@ -47065,14 +44792,11 @@ index 000000000..c5b3b094c + MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); + StubRoutines::_montgomerySquare = g.generate_square(); + } -+#endif // COMPILER2 -+ // Safefetch stubs. -+ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, -+ &StubRoutines::_safefetch32_fault_pc, -+ &StubRoutines::_safefetch32_continuation_pc); -+ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, -+ &StubRoutines::_safefetchN_fault_pc, -+ &StubRoutines::_safefetchN_continuation_pc); ++#endif ++ ++ generate_compare_long_strings(); ++ ++ generate_string_indexof_stubs(); + + StubRoutines::riscv::set_completed(); + } @@ -47094,12 +44818,12 @@ index 000000000..c5b3b094c +} diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp new file mode 100644 -index 000000000..633108b95 +index 0000000000..9202d9ec4b --- /dev/null +++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp -@@ -0,0 +1,60 @@ +@@ -0,0 +1,57 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -47134,7 +44858,6 @@ index 000000000..633108b95 +// Implementation of the platform-specific part of StubRoutines - for +// a description of how to extend it, see the stubRoutines.hpp file. + -+address StubRoutines::riscv::_get_previous_fp_entry = NULL; +address StubRoutines::riscv::_get_previous_sp_entry = NULL; + +address StubRoutines::riscv::_f2i_fixup = NULL; @@ -47146,8 +44869,6 @@ index 000000000..633108b95 +address StubRoutines::riscv::_double_sign_mask = NULL; +address StubRoutines::riscv::_double_sign_flip = NULL; +address StubRoutines::riscv::_zero_blocks = NULL; -+address StubRoutines::riscv::_has_negatives = NULL; -+address StubRoutines::riscv::_has_negatives_long = NULL; +address StubRoutines::riscv::_compare_long_string_LL = NULL; +address StubRoutines::riscv::_compare_long_string_UU = NULL; +address StubRoutines::riscv::_compare_long_string_LU = NULL; @@ -47160,12 +44881,12 @@ index 000000000..633108b95 +bool StubRoutines::riscv::_completed = false; diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp new file mode 100644 -index 000000000..8aa81980e +index 0000000000..0c9445e18a --- /dev/null +++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp -@@ -0,0 +1,179 @@ +@@ -0,0 +1,155 @@ +/* -+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -47197,20 +44918,19 @@ index 000000000..8aa81980e +// definition. See stubRoutines.hpp for a description on how to +// extend it. + -+static bool returns_to_call_stub(address return_pc) { ++static bool returns_to_call_stub(address return_pc) { + return return_pc == _call_stub_return_address; +} + +enum platform_dependent_constants { + code_size1 = 19000, // simply increase if too small (assembler will crash if too small) -+ code_size2 = 36000 // simply increase if too small (assembler will crash if too small) ++ code_size2 = 28000 // simply increase if too small (assembler will crash if too small) +}; + +class riscv { + friend class StubGenerator; + + private: -+ static address _get_previous_fp_entry; + static address _get_previous_sp_entry; + + static address _f2i_fixup; @@ -47225,8 +44945,6 @@ index 000000000..8aa81980e + + static address _zero_blocks; + -+ static address _has_negatives; -+ static address _has_negatives_long; + static address _compare_long_string_LL; + static address _compare_long_string_LU; + static address _compare_long_string_UL; @@ -47235,57 +44953,44 @@ index 000000000..8aa81980e + static address _string_indexof_linear_uu; + static address _string_indexof_linear_ul; + static address _large_byte_array_inflate; ++ + static bool _completed; + + public: + -+ static address get_previous_fp_entry() -+ { -+ return _get_previous_fp_entry; -+ } -+ -+ static address get_previous_sp_entry() -+ { ++ static address get_previous_sp_entry() { + return _get_previous_sp_entry; + } + -+ static address f2i_fixup() -+ { ++ static address f2i_fixup() { + return _f2i_fixup; + } + -+ static address f2l_fixup() -+ { ++ static address f2l_fixup() { + return _f2l_fixup; + } + -+ static address d2i_fixup() -+ { ++ static address d2i_fixup() { + return _d2i_fixup; + } + -+ static address d2l_fixup() -+ { ++ static address d2l_fixup() { + return _d2l_fixup; + } + -+ static address float_sign_mask() -+ { ++ static address float_sign_mask() { + return _float_sign_mask; + } + -+ static address float_sign_flip() -+ { ++ static address float_sign_flip() { + return _float_sign_flip; + } + -+ static address double_sign_mask() -+ { ++ static address double_sign_mask() { + return _double_sign_mask; + } + -+ static address double_sign_flip() -+ { ++ static address double_sign_flip() { + return _double_sign_flip; + } + @@ -47293,14 +44998,6 @@ index 000000000..8aa81980e + return _zero_blocks; + } + -+ static address has_negatives() { -+ return _has_negatives; -+ } -+ -+ static address has_negatives_long() { -+ return _has_negatives_long; -+ } -+ + static address compare_long_string_LL() { + return _compare_long_string_LL; + } @@ -47345,14 +45042,14 @@ index 000000000..8aa81980e +#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp new file mode 100644 -index 000000000..f5e212204 +index 0000000000..e639fa7e12 --- /dev/null +++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp -@@ -0,0 +1,1841 @@ +@@ -0,0 +1,1833 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -47377,7 +45074,6 @@ index 000000000..f5e212204 + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" -+#include "classfile/javaClasses.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/bytecodeTracer.hpp" @@ -47396,13 +45092,13 @@ index 000000000..f5e212204 +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" ++#include "runtime/jniHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" -+#include "utilities/macros.hpp" +#include + +#ifndef PRODUCT @@ -47453,7 +45149,7 @@ index 000000000..f5e212204 + // bcp (NULL) + // ... + -+ // Restore RA ++ // Restore ra + __ ld(ra, Address(sp, 0)); + __ addi(sp, sp , 2 * wordSize); + @@ -47892,7 +45588,7 @@ index 000000000..f5e212204 + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); -+ __ membar(MacroAssembler::AnyAny); ++ __ fence(0xf, 0xf); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); + return entry; +} @@ -47908,9 +45604,10 @@ index 000000000..f5e212204 +// +// xmethod: method +// -+void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, -+ Label* profile_method, -+ Label* profile_method_continue) { ++void TemplateInterpreterGenerator::generate_counter_incr( ++ Label* overflow, ++ Label* profile_method, ++ Label* profile_method_continue) { + Label done; + // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. + if (TieredCompilation) { @@ -47922,7 +45619,7 @@ index 000000000..f5e212204 + __ beqz(x10, no_mdo); + // Increment counter in the MDO + const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + -+ in_bytes(InvocationCounter::counter_offset())); ++ in_bytes(InvocationCounter::counter_offset())); + const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); + __ j(done); @@ -47930,19 +45627,19 @@ index 000000000..f5e212204 + __ bind(no_mdo); + // Increment counter in MethodCounters + const Address invocation_counter(t1, -+ MethodCounters::invocation_counter_offset() + -+ InvocationCounter::counter_offset()); ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); + __ get_method_counters(xmethod, t1, done); + const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); + __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); + __ bind(done); + } else { // not TieredCompilation + const Address backedge_counter(t1, -+ MethodCounters::backedge_counter_offset() + -+ InvocationCounter::counter_offset()); ++ MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset()); + const Address invocation_counter(t1, -+ MethodCounters::invocation_counter_offset() + -+ InvocationCounter::counter_offset()); ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); + + __ get_method_counters(xmethod, t1, done); + @@ -47978,7 +45675,7 @@ index 000000000..f5e212204 + __ ld(t1, Address(xmethod, Method::method_counters_offset())); + __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); + __ bltu(x10, t1, done); -+ __ j(*overflow); // offset is too large so we have to use j instead of bgeu here ++ __ j(*overflow); + } + __ bind(done); + } @@ -48168,16 +45865,19 @@ index 000000000..f5e212204 +#if INCLUDE_SHENANDOAHGC + if (UseShenandoahGC) { + __ load_mirror(x28, xmethod); ++ __ sd(zr, Address(sp, 5 * wordSize)); + __ sd(x28, Address(sp, 4 * wordSize)); + } else +#endif + { -+ __ load_mirror(t0, xmethod); -+ __ sd(t0, Address(sp, 4 * wordSize)); ++ __ load_mirror(t2, xmethod); ++ __ sd(zr, Address(sp, 5 * wordSize)); ++ __ sd(t2, Address(sp, 4 * wordSize)); + } -+ __ sd(zr, Address(sp, 5 * wordSize)); + -+ __ load_constant_pool_cache(xcpool, xmethod); ++ __ ld(xcpool, Address(xmethod, Method::const_offset())); ++ __ ld(xcpool, Address(xcpool, ConstMethod::constants_offset())); ++ __ ld(xcpool, Address(xcpool, ConstantPool::cache_offset_in_bytes())); + __ sd(xcpool, Address(sp, 3 * wordSize)); + __ sd(xlocals, Address(sp, 2 * wordSize)); + @@ -48192,7 +45892,8 @@ index 000000000..f5e212204 + + // Move SP out of the way + if (!native_call) { -+ __ load_max_stack(t0, xmethod); ++ __ ld(t0, Address(xmethod, Method::const_offset())); ++ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); + __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2); + __ slli(t0, t0, 3); + __ sub(t0, sp, t0); @@ -48237,7 +45938,7 @@ index 000000000..f5e212204 + // xmethod: Method* + // x30: senderSP must preserve for slow path, set SP to it on fast path + -+ // RA is live. It must be saved around calls. ++ // ra is live. It must be saved around calls. + + address entry = __ pc(); + @@ -48303,14 +46004,12 @@ index 000000000..f5e212204 + // Bang each page in the shadow zone. We can't assume it's been done for + // an interpreter frame with greater than a page of locals, so each page + // needs to be checked. Only true for non-native. -+ if (UseStackBanging) { -+ const int n_shadow_pages = checked_cast(JavaThread::stack_shadow_zone_size()) / os::vm_page_size(); -+ const int start_page = native_call ? n_shadow_pages : 1; -+ const int page_size = os::vm_page_size(); -+ for (int pages = start_page; pages <= n_shadow_pages ; pages++) { -+ __ sub(t1, sp, pages * page_size); -+ __ sd(zr, Address(t1)); -+ } ++ const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size(); ++ const int start_page = native_call ? n_shadow_pages : 1; ++ const int page_size = os::vm_page_size(); ++ for (int pages = start_page; pages <= n_shadow_pages ; pages++) { ++ __ sub(t0, sp, pages * page_size); ++ __ sd(zr, Address(t0)); + } +} + @@ -48319,7 +46018,7 @@ index 000000000..f5e212204 +// native method than the typical interpreter frame setup. +address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // determine code generation flags -+ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // x11: Method* + // x30: sender sp @@ -48535,18 +46234,8 @@ index 000000000..f5e212204 + __ mv(t0, _thread_in_native_trans); + __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); + -+ if (os::is_MP()) { -+ if (UseMembar) { -+ // Force this write out before the read below -+ __ membar(MacroAssembler::AnyAny); -+ } else { -+ // Write serialization page so VM thread can do a pseudo remote membar. -+ // We use the current thread pointer to calculate a thread specific -+ // offset to write to within the page. This minimizes bus traffic -+ // due to cache line collision. -+ __ serialize_memory(xthread, t0, t1); -+ } -+ } ++ // Force this write out before the read below ++ __ membar(MacroAssembler::AnyAny); + + // check for safepoint operation in progress and/or pending suspend requests + { @@ -48592,7 +46281,7 @@ index 000000000..f5e212204 + // and result handler will pick it up + + { -+ Label no_oop, not_weak, store_result; ++ Label no_oop; + __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT))); + __ bne(t, result_handler, no_oop); + // Unbox oop result, e.g. JNIHandles::resolve result. @@ -48607,14 +46296,14 @@ index 000000000..f5e212204 + { + Label no_reguard; + __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset()))); -+ __ addi(t1, zr, JavaThread::stack_guard_yellow_reserved_disabled); ++ __ addi(t1, zr, (u1)JavaThread::stack_guard_yellow_reserved_disabled); + __ bne(t0, t1, no_reguard); + -+ __ push_call_clobbered_registers(); ++ __ pusha(); // only save smashed registers + __ mv(c_rarg0, xthread); + __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); + __ jalr(t1); -+ __ pop_call_clobbered_registers(); ++ __ popa(); // only restore smashed registers + __ bind(no_reguard); + } + @@ -48850,7 +46539,7 @@ index 000000000..f5e212204 + __ set_method_data_pointer_for_bcp(); + // don't think we need this + __ get_method(x11); -+ __ jal(profile_method_continue); ++ __ j(profile_method_continue); + } + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); @@ -49014,7 +46703,7 @@ index 000000000..f5e212204 + Label L_done; + + __ lbu(t0, Address(xbcp, 0)); -+ __ mv(t1, Bytecodes::_invokestatic); ++ __ li(t1, Bytecodes::_invokestatic); + __ bne(t1, t0, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. @@ -49060,7 +46749,7 @@ index 000000000..f5e212204 + // ra: return address/pc that threw exception + // sp: expression stack of caller + // fp: fp of caller -+ // FIXME: There's no point saving RA here because VM calls don't trash it ++ // FIXME: There's no point saving ra here because VM calls don't trash it + __ sub(sp, sp, 2 * wordSize); + __ sd(x10, Address(sp, 0)); // save exception + __ sd(ra, Address(sp, wordSize)); // save return address @@ -49157,7 +46846,7 @@ index 000000000..f5e212204 + __ push_reg(t0); + __ push_reg(x10); + __ mv(x10, (address) &BytecodeCounter::_counter_value); -+ __ mv(t0, 1); ++ __ li(t0, 1); + __ amoadd_d(zr, x10, t0, Assembler::aqrl); + __ pop_reg(x10); + __ pop_reg(t0); @@ -49192,14 +46881,14 @@ index 000000000..f5e212204 +#endif // !PRODUCT diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp new file mode 100644 -index 000000000..8e6e7dee5 +index 0000000000..84b1afc7dc --- /dev/null +++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp -@@ -0,0 +1,4028 @@ +@@ -0,0 +1,4006 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -49225,6 +46914,7 @@ index 000000000..8e6e7dee5 +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" @@ -49234,6 +46924,7 @@ index 000000000..8e6e7dee5 +#include "oops/methodData.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" @@ -49245,7 +46936,7 @@ index 000000000..8e6e7dee5 +// Platform-dependent initialization + +void TemplateTable::pd_initialize() { -+ // No riscv specific initialization ++ // No RISC-V specific initialization +} + +// Address computation: local variables @@ -49270,12 +46961,15 @@ index 000000000..8e6e7dee5 + return iaddress(n); +} + -+static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++ assert_cond(_masm != NULL); + _masm->shadd(temp, r, xlocals, temp, 3); + return Address(temp, 0); +} + -+static inline Address laddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++static inline Address laddress(Register r, Register temp, ++ InterpreterMacroAssembler* _masm) { ++ assert_cond(_masm != NULL); + _masm->shadd(temp, r, xlocals, temp, 3); + return Address(temp, Interpreter::local_offset_in_bytes(1));; +} @@ -49284,7 +46978,8 @@ index 000000000..8e6e7dee5 + return iaddress(r, temp, _masm); +} + -+static inline Address daddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++static inline Address daddress(Register r, Register temp, ++ InterpreterMacroAssembler* _masm) { + return laddress(r, temp, _masm); +} + @@ -49292,6 +46987,10 @@ index 000000000..8e6e7dee5 + return iaddress(r, temp, _masm); +} + ++static inline Address at_rsp() { ++ return Address(esp, 0); ++} ++ +// At top of Java expression stack which may be different than esp(). It +// isn't for category 1 objects. +static inline Address at_tos () { @@ -49326,13 +47025,15 @@ index 000000000..8e6e7dee5 + Register val, + DecoratorSet decorators) { + assert(val == noreg || val == x10, "parameter is just for looks"); -+ __ store_heap_oop(dst, val, x29, x11, x13, decorators); ++ assert_cond(_masm != NULL); ++ __ store_heap_oop(dst, val, x29, x11, decorators); +} + +static void do_oop_load(InterpreterMacroAssembler* _masm, + Address src, + Register dst, + DecoratorSet decorators) { ++ assert_cond(_masm != NULL); + __ load_heap_oop(dst, src, x7, x11, decorators); +} + @@ -49426,13 +47127,13 @@ index 000000000..8e6e7dee5 +void TemplateTable::iconst(int value) +{ + transition(vtos, itos); -+ __ mv(x10, value); ++ __ li(x10, value); +} + +void TemplateTable::lconst(int value) +{ + transition(vtos, ltos); -+ __ mv(x10, value); ++ __ li(x10, value); +} + +void TemplateTable::fconst(int value) @@ -49641,7 +47342,6 @@ index 000000000..8e6e7dee5 + __ bind(notLong); + condy_helper(Done); + __ bind(Done); -+ +} + +void TemplateTable::condy_helper(Label& Done) @@ -49937,8 +47637,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); -+ __ shadd(t0, x11, x10, t0, 2); -+ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 2); ++ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); + __ addw(x10, x10, zr); // signed extended +} + @@ -49951,8 +47651,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); -+ __ shadd(t0, x11, x10, t0, 3); -+ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 3); ++ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::faload() @@ -49964,8 +47664,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); -+ __ shadd(t0, x11, x10, t0, 2); -+ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 2); ++ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::daload() @@ -49977,8 +47677,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); -+ __ shadd(t0, x11, x10, t0, 3); -+ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 3); ++ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::aaload() @@ -49990,9 +47690,9 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); -+ __ shadd(t0, x11, x10, t0, LogBytesPerHeapOop); ++ __ shadd(x10, x11, x10, t0, LogBytesPerHeapOop); + do_oop_load(_masm, -+ Address(t0), ++ Address(x10), + x10, + IS_ARRAY); +} @@ -50006,8 +47706,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); -+ __ shadd(t0, x11, x10, t0, 0); -+ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 0); ++ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::caload() @@ -50019,8 +47719,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); -+ __ shadd(t0, x11, x10, t0, 1); -+ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 1); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +// iload followed by caload frequent pair @@ -50036,8 +47736,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11, kills t0 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11 -+ __ shadd(t0, x11, x10, t0, 1); -+ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 1); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::saload() @@ -50049,8 +47749,8 @@ index 000000000..8e6e7dee5 + // x11: index + index_check(x10, x11); // leaves index in x11, kills t0 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1); -+ __ shadd(t0, x11, x10, t0, 1); -+ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ shadd(x10, x11, x10, t0, 1); ++ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(x10), noreg, noreg); +} + +void TemplateTable::iload(int n) @@ -50237,7 +47937,7 @@ index 000000000..8e6e7dee5 + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); + __ shadd(t0, x11, x13, t0, 2); -+ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); ++ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); +} + +void TemplateTable::lastore() { @@ -50250,7 +47950,7 @@ index 000000000..8e6e7dee5 + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); + __ shadd(t0, x11, x13, t0, 3); -+ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); ++ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); +} + +void TemplateTable::fastore() { @@ -50263,7 +47963,7 @@ index 000000000..8e6e7dee5 + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); + __ shadd(t0, x11, x13, t0, 2); -+ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg, noreg); ++ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg); +} + +void TemplateTable::dastore() { @@ -50276,7 +47976,7 @@ index 000000000..8e6e7dee5 + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); + __ shadd(t0, x11, x13, t0, 3); -+ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg, noreg); ++ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg); +} + +void TemplateTable::aastore() { @@ -50357,7 +48057,7 @@ index 000000000..8e6e7dee5 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); + + __ add(x11, x13, x11); -+ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg, noreg); ++ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg); +} + +void TemplateTable::castore() @@ -50371,7 +48071,7 @@ index 000000000..8e6e7dee5 + index_check(x13, x11); // prefer index in x11 + __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); + __ shadd(t0, x11, x13, t0, 1); -+ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); ++ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg); +} + +void TemplateTable::sastore() @@ -50982,9 +48682,9 @@ index 000000000..8e6e7dee5 + } else { // not TieredCompilation + // increment counter + __ ld(t1, Address(xmethod, Method::method_counters_offset())); -+ __ lwu(x10, Address(t1, be_offset)); // load backedge counter ++ __ lwu(x10, Address(t1, be_offset)); // load backedge counter + __ addw(t0, x10, InvocationCounter::count_increment); // increment counter -+ __ sw(t0, Address(t1, be_offset)); // store counter ++ __ sw(t0, Address(t1, be_offset)); // store counter + + __ lwu(x10, Address(t1, inv_offset)); // load invocation counter + __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits @@ -51025,7 +48725,6 @@ index 000000000..8e6e7dee5 + } + __ bind(dispatch); + } -+ + // Pre-load the next target bytecode into t0 + __ load_unsigned_byte(t0, Address(xbcp, 0)); + @@ -51092,7 +48791,6 @@ index 000000000..8e6e7dee5 + __ jr(t0); + } + } -+ +} + +void TemplateTable::if_0cmp(Condition cc) @@ -51188,9 +48886,9 @@ index 000000000..8e6e7dee5 + __ pop_ptr(x11); + + if (cc == equal) { -+ __ oop_nequal(x11, x10, not_taken); ++ __ bne(x11, x10, not_taken); + } else if (cc == not_equal) { -+ __ oop_equal(x11, x10, not_taken); ++ __ beq(x11, x10, not_taken); + } + branch(false, false); + __ bind(not_taken); @@ -51648,14 +49346,6 @@ index 000000000..8e6e7dee5 + pop_and_check_object(obj); + } + -+ if (!UseBarriersForVolatile) { -+ Label notVolatile; -+ __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::AnyAny); -+ __ bind(notVolatile); -+ } -+ + __ add(off, obj, off); + const Address field(off); + @@ -51918,7 +49608,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); // off register as temparator register. -+ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no); + } @@ -51938,7 +49628,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no); + } @@ -51979,7 +49669,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no); + } @@ -51999,7 +49689,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no); + } @@ -52019,7 +49709,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no); + } @@ -52039,7 +49729,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no); + } @@ -52059,7 +49749,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg); ++ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no); + } @@ -52081,7 +49771,7 @@ index 000000000..8e6e7dee5 + } + __ add(off, obj, off); // if static, obj from cache, else obj from stack. + const Address field(off, 0); -+ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg); ++ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no); + } @@ -52218,28 +49908,28 @@ index 000000000..8e6e7dee5 + do_oop_store(_masm, field, x10, IN_HEAP); + break; + case Bytecodes::_fast_lputfield: -+ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_iputfield: -+ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_zputfield: -+ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_bputfield: -+ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_sputfield: -+ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_cputfield: -+ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg); ++ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg); + break; + case Bytecodes::_fast_fputfield: -+ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg); ++ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg); + break; + case Bytecodes::_fast_dputfield: -+ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg); ++ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg); + break; + default: + ShouldNotReachHere(); @@ -52298,14 +49988,6 @@ index 000000000..8e6e7dee5 + __ add(x11, x10, x11); + const Address field(x11, 0); + -+ if (!UseBarriersForVolatile) { -+ Label notVolatile; -+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::AnyAny); -+ __ bind(notVolatile); -+ } -+ + // access field + switch (bytecode()) { + case Bytecodes::_fast_agetfield: @@ -52357,16 +50039,6 @@ index 000000000..8e6e7dee5 + __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()))); + -+ if (!UseBarriersForVolatile) { -+ Label notVolatile; -+ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + -+ ConstantPoolCacheEntry::flags_offset()))); -+ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); -+ __ beqz(t0, notVolatile); -+ __ membar(MacroAssembler::AnyAny); -+ __ bind(notVolatile); -+ } -+ + // make sure exception is reported in correct bcp range (getfield is + // next instruction) + __ addi(xbcp, xbcp, 1); @@ -52383,8 +50055,8 @@ index 000000000..8e6e7dee5 + __ verify_oop(x10); + break; + case ftos: -+ __ add(t0, x10, x11); -+ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(t0), noreg, noreg); ++ __ add(x10, x10, x11); ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(x10), noreg, noreg); + break; + default: + ShouldNotReachHere(); @@ -52406,11 +50078,6 @@ index 000000000..8e6e7dee5 +//----------------------------------------------------------------------------- +// Calls + -+void TemplateTable::count_calls(Register method, Register temp) -+{ -+ __ call_Unimplemented(); -+} -+ +void TemplateTable::prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index, // itable index, MethodType, etc. @@ -52432,8 +50099,8 @@ index 000000000..8e6e7dee5 + assert(recv == noreg || recv == x12, ""); + + // setup registers & access constant pool cache -+ if (recv == noreg) { -+ recv = x12; ++ if (recv == noreg) { ++ recv = x12; + } + if (flags == noreg) { + flags = x13; @@ -52495,7 +50162,7 @@ index 000000000..8e6e7dee5 + __ beqz(t0, notFinal); + + const Register method = index; // method must be xmethod -+ assert(method == xmethod, "methodOop must be xmethod for interpreter calling convention"); ++ assert(method == xmethod, "Method must be xmethod for interpreter calling convention"); + + // do the call - the index is actually the method to call + // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method* @@ -52518,7 +50185,7 @@ index 000000000..8e6e7dee5 + // profile this call + __ profile_virtual_call(x10, xlocals, x13); + -+ // get target methodOop & entry point ++ // get target Method & entry point + __ lookup_virtual_method(x10, index, method); + __ profile_arguments_type(x13, method, x14, true); + __ jump_from_interpreted(method); @@ -52654,7 +50321,7 @@ index 000000000..8e6e7dee5 + xmethod, x30, + no_such_interface); + -+ // xmethod: methodOop to call ++ // xmethod: Method to call + // x12: receiver + // Check for abstract method error + // Note: This should be done more efficiently via a throw_abstract_method_error @@ -52666,7 +50333,7 @@ index 000000000..8e6e7dee5 + + // do the call + // x12: receiver -+ // xmethod,: methodOop ++ // xmethod: Method + __ jump_from_interpreted(xmethod); + __ should_not_reach_here(); + @@ -52828,7 +50495,7 @@ index 000000000..8e6e7dee5 + __ bnez(x13, loop); + } + -+ // initialize object hader only. ++ // initialize object header only. + __ bind(initialize_header); + if (UseBiasedLocking) { + __ ld(t0, Address(x14, Klass::prototype_header_offset())); @@ -52988,7 +50655,7 @@ index 000000000..8e6e7dee5 + __ j(done); + // Come here on success + __ bind(ok_is_subtype); -+ __ mv(x10, 1); ++ __ li(x10, 1); + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { @@ -53226,12 +50893,12 @@ index 000000000..8e6e7dee5 +} diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp new file mode 100644 -index 000000000..b437c8f4c +index 0000000000..fcc86108d2 --- /dev/null +++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp @@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -53274,13 +50941,12 @@ index 000000000..b437c8f4c +#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp new file mode 100644 -index 000000000..03079aec0 +index 0000000000..6c89133de0 --- /dev/null +++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp -@@ -0,0 +1,43 @@ +@@ -0,0 +1,42 @@ +/* -+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -53323,10 +50989,10 @@ index 000000000..03079aec0 +#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp new file mode 100644 -index 000000000..dd4f5c9ae +index 0000000000..6bdce51506 --- /dev/null +++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp -@@ -0,0 +1,91 @@ +@@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. @@ -53352,6 +51018,7 @@ index 000000000..dd4f5c9ae + * + */ + ++#include "precompiled.hpp" +#include "memory/allocation.hpp" +#include "memory/allocation.inline.hpp" +#include "runtime/os.inline.hpp" @@ -53371,11 +51038,6 @@ index 000000000..dd4f5c9ae + return; + } + -+ int core_id = -1; -+ int chip_id = -1; -+ int len = 0; -+ char* src_string = NULL; -+ + _no_of_cores = os::processor_count(); + _no_of_threads = _no_of_cores; + _no_of_sockets = _no_of_cores; @@ -53420,13 +51082,13 @@ index 000000000..dd4f5c9ae +} diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp new file mode 100644 -index 000000000..0982b6668 +index 0000000000..711e4aeaf6 --- /dev/null +++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -53481,12 +51143,12 @@ index 000000000..0982b6668 +#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp new file mode 100644 -index 000000000..31d5bb5f4 +index 0000000000..0e8f526bd9 --- /dev/null +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp -@@ -0,0 +1,190 @@ +@@ -0,0 +1,209 @@ +/* -+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -53512,21 +51174,31 @@ index 000000000..31d5bb5f4 + +#include "precompiled.hpp" +#include "runtime/java.hpp" ++#include "runtime/os.hpp" +#include "runtime/vm_version.hpp" -+#include "utilities/macros.hpp" +#include "utilities/formatBuffer.hpp" ++#include "utilities/macros.hpp" + +#include OS_HEADER_INLINE(os) + +const char* VM_Version::_uarch = ""; ++const char* VM_Version::_vm_mode = ""; +uint32_t VM_Version::_initial_vector_length = 0; + +void VM_Version::initialize() { + get_os_cpu_info(); + ++ // check if satp.mode is supported, currently supports up to SV48(RV64) ++ if (get_satp_mode() > VM_SV48) { ++ vm_exit_during_initialization( ++ err_msg("Unsupported satp mode: %s. Only satp modes up to sv48 are supported for now.", ++ _vm_mode)); ++ } ++ + if (FLAG_IS_DEFAULT(UseFMA)) { + FLAG_SET_DEFAULT(UseFMA, true); + } ++ + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0); + } @@ -53568,12 +51240,12 @@ index 000000000..31d5bb5f4 + } + + if (UseCRC32Intrinsics) { -+ warning("CRC32Intrinsics instructions are not available on this CPU."); ++ warning("CRC32 intrinsics are not available on this CPU."); + FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); + } + + if (UseCRC32CIntrinsics) { -+ warning("CRC32CIntrinsics instructions are not available on this CPU."); ++ warning("CRC32C intrinsics are not available on this CPU."); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + @@ -53587,6 +51259,11 @@ index 000000000..31d5bb5f4 + } + } + ++ if (UseRVC && !(_features & CPU_C)) { ++ warning("RVC is not supported on this CPU"); ++ FLAG_SET_DEFAULT(UseRVC, false); ++ } ++ + if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) { + FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true); + } @@ -53610,16 +51287,20 @@ index 000000000..31d5bb5f4 + _features_string = os::strdup(buf); + +#ifdef COMPILER2 -+ initialize_c2(); ++ c2_initialize(); +#endif // COMPILER2 ++ ++ UNSUPPORTED_OPTION(CriticalJNINatives); ++ ++ FLAG_SET_DEFAULT(UseMembar, true); +} + +#ifdef COMPILER2 -+void VM_Version::initialize_c2() { -+ // lack of cmove in riscv ++void VM_Version::c2_initialize() { + if (UseCMoveUnconditionally) { + FLAG_SET_DEFAULT(UseCMoveUnconditionally, false); + } ++ + if (ConditionalMoveLimit > 0) { + FLAG_SET_DEFAULT(ConditionalMoveLimit, 0); + } @@ -53658,14 +51339,6 @@ index 000000000..31d5bb5f4 + FLAG_SET_DEFAULT(UseMulAddIntrinsic, true); + } + -+ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { -+ FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true); -+ } -+ -+ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { -+ FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true); -+ } -+ + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { + FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true); + } @@ -53673,16 +51346,25 @@ index 000000000..31d5bb5f4 + if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { + FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true); + } ++ ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true); ++ } +} +#endif // COMPILER2 diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp new file mode 100644 -index 000000000..0178e6d75 +index 0000000000..875511f522 --- /dev/null +++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp -@@ -0,0 +1,65 @@ +@@ -0,0 +1,80 @@ +/* -+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -53710,10 +51392,33 @@ index 000000000..0178e6d75 +#define CPU_RISCV_VM_VERSION_RISCV_HPP + +#include "runtime/abstract_vm_version.hpp" ++#include "runtime/arguments.hpp" +#include "runtime/globals_extension.hpp" +#include "utilities/sizes.hpp" + +class VM_Version : public Abstract_VM_Version { ++#ifdef COMPILER2 ++private: ++ static void c2_initialize(); ++#endif // COMPILER2 ++ ++// VM modes (satp.mode) privileged ISA 1.10 ++enum VM_MODE { ++ VM_MBARE = 0, ++ VM_SV39 = 8, ++ VM_SV48 = 9, ++ VM_SV57 = 10, ++ VM_SV64 = 11 ++}; ++ ++protected: ++ static const char* _uarch; ++ static const char* _vm_mode; ++ static uint32_t _initial_vector_length; ++ static void get_os_cpu_info(); ++ static uint32_t get_current_vector_length(); ++ static VM_MODE get_satp_mode(); ++ +public: + // Initialization + static void initialize(); @@ -53733,29 +51438,19 @@ index 000000000..0178e6d75 +#undef DECLARE_CPU_FEATURE_FLAG + }; + -+protected: -+ static const char* _uarch; -+ static uint32_t _initial_vector_length; -+ static void get_os_cpu_info(); -+ static uint32_t get_current_vector_length(); -+ -+#ifdef COMPILER2 -+private: -+ static void initialize_c2(); -+#endif // COMPILER2 ++ static void initialize_cpu_information(void); +}; + +#endif // CPU_RISCV_VM_VERSION_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp new file mode 100644 -index 000000000..6572d9334 +index 0000000000..c4338715f9 --- /dev/null +++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp -@@ -0,0 +1,60 @@ +@@ -0,0 +1,51 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -53783,10 +51478,10 @@ index 000000000..6572d9334 +#include "code/vmreg.hpp" + +void VMRegImpl::set_regName() { -+ Register reg = ::as_Register(0); + int i = 0; ++ Register reg = ::as_Register(0); + for ( ; i < ConcreteRegisterImpl::max_gpr ; ) { -+ for (int j = 0; j < RegisterImpl::max_slots_per_register; j++) { ++ for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) { + regName[i++] = reg->name(); + } + reg = reg->successor(); @@ -53794,34 +51489,25 @@ index 000000000..6572d9334 + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { -+ for (int j = 0; j < FloatRegisterImpl::max_slots_per_register; j++) { ++ for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { + regName[i++] = freg->name(); + } + freg = freg->successor(); + } + -+ VectorRegister vreg = ::as_VectorRegister(0); -+ for ( ; i < ConcreteRegisterImpl::max_vpr ; ) { -+ for (int j = 0; j < VectorRegisterImpl::max_slots_per_register; j++) { -+ regName[i++] = vreg->name(); -+ } -+ vreg = vreg->successor(); -+ } -+ -+ for ( ; i < ConcreteRegisterImpl::number_of_registers; i++) { -+ regName[i] = "NON-GPR-FPR-VPR"; ++ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i++) { ++ regName[i] = "NON-GPR-FPR"; + } +} diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp new file mode 100644 -index 000000000..ec76a1db1 +index 0000000000..6f613a8f11 --- /dev/null +++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp -@@ -0,0 +1,64 @@ +@@ -0,0 +1,53 @@ +/* -+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -53855,27 +51541,17 @@ index 000000000..ec76a1db1 + return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; +} + -+inline bool is_VectorRegister() { -+ return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr; -+} -+ +inline Register as_Register() { -+ assert( is_Register(), "must be"); ++ assert(is_Register(), "must be"); + return ::as_Register(value() / RegisterImpl::max_slots_per_register); +} + +inline FloatRegister as_FloatRegister() { -+ assert( is_FloatRegister() && is_even(value()), "must be" ); ++ assert(is_FloatRegister() && is_even(value()), "must be"); + return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / + FloatRegisterImpl::max_slots_per_register); +} + -+inline VectorRegister as_VectorRegister() { -+ assert( is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be" ); -+ return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) / -+ VectorRegisterImpl::max_slots_per_register); -+} -+ +inline bool is_concrete() { + assert(is_reg(), "must be"); + return is_even(value()); @@ -53884,14 +51560,13 @@ index 000000000..ec76a1db1 +#endif // CPU_RISCV_VMREG_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp new file mode 100644 -index 000000000..9605e59f4 +index 0000000000..06b70020b4 --- /dev/null +++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp -@@ -0,0 +1,47 @@ +@@ -0,0 +1,46 @@ +/* -+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2014, Red Hat Inc. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -53917,19 +51592,19 @@ index 000000000..9605e59f4 +#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP +#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP + -+inline VMReg RegisterImpl::as_VMReg() { -+ if( this == noreg ) { ++inline VMReg RegisterImpl::as_VMReg() const { ++ if (this == noreg) { + return VMRegImpl::Bad(); + } + return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); +} + -+inline VMReg FloatRegisterImpl::as_VMReg() { ++inline VMReg FloatRegisterImpl::as_VMReg() const { + return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + + ConcreteRegisterImpl::max_gpr); +} + -+inline VMReg VectorRegisterImpl::as_VMReg() { ++inline VMReg VectorRegisterImpl::as_VMReg() const { + return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) + + ConcreteRegisterImpl::max_fpr); +} @@ -53937,12 +51612,12 @@ index 000000000..9605e59f4 +#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp new file mode 100644 -index 000000000..b2aa87ab8 +index 0000000000..0d205240a5 --- /dev/null +++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp @@ -0,0 +1,260 @@ +/* -+ * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -54015,7 +51690,7 @@ index 000000000..b2aa87ab8 +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); -+ __ increment(Address(t2)); ++ __ add_memory_int64(Address(t2), 1); + } +#endif + @@ -54106,7 +51781,7 @@ index 000000000..b2aa87ab8 +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); -+ __ increment(Address(x18)); ++ __ add_memory_int64(Address(x18), 1); + } +#endif + @@ -54114,7 +51789,7 @@ index 000000000..b2aa87ab8 + assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + + // Entry arguments: -+ // t2: CompiledICHolder ++ // t1: CompiledICHolder + // j_rarg0: Receiver + + // This stub is called from compiled code which has no callee-saved registers, @@ -54197,380 +51872,24 @@ index 000000000..b2aa87ab8 +} + +int VtableStub::pd_code_alignment() { -+ // riscv cache line size is 64 bytes, but we want to limit alignment loss. ++ // RISCV cache line size is not an architected constant. We just align on word size. + const unsigned int icache_line_size = wordSize; + return icache_line_size; +} -diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -index 897be2209..3b836fe6b 100644 ---- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -@@ -1447,7 +1447,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op - } - - // result = condition ? opr1 : opr2 --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { -+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr || cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on s390"); -+ - Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual; - switch (condition) { - case lir_cond_equal: acond = Assembler::bcondEqual; ncond = Assembler::bcondNotEqual; break; -diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad -index e335f473d..53ad912cb 100644 ---- a/src/hotspot/cpu/s390/s390.ad -+++ b/src/hotspot/cpu/s390/s390.ad -@@ -1522,14 +1522,16 @@ const bool Matcher::match_rule_supported(int opcode) { - // BUT: make sure match rule is not disabled by a false predicate! - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - // TODO - // Identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen. -- bool ret_value = match_rule_supported(opcode); -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; -+ } - // Add rules here. - -- return ret_value; // Per default match rules are supported. -+ return true; // Per default match rules are supported. - } - - int Matcher::regnum_to_fpu_offset(int regnum) { -@@ -1578,6 +1580,14 @@ const uint Matcher::vector_shift_count_ideal_reg(int size) { - return Node::NotAMachineReg; - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // z/Architecture does support misaligned store/load at minimal extra cost. - const bool Matcher::misaligned_vectors_ok() { - return true; -diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad -index 7a2798a51..7d9b17b44 100644 ---- a/src/hotspot/cpu/sparc/sparc.ad -+++ b/src/hotspot/cpu/sparc/sparc.ad -@@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for -@@ -1764,6 +1764,14 @@ const int Matcher::min_vector_size(const BasicType bt) { - return max_vector_size(bt); // Same as max. - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ - // SPARC doesn't support misaligned vectors store/load. - const bool Matcher::misaligned_vectors_ok() { - return false; -diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -index cee3140f4..d38c63600 100644 ---- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -@@ -1970,7 +1970,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { - } - } - --void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { -+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, -+ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { -+ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on x86"); -+ - Assembler::Condition acond, ncond; - switch (condition) { - case lir_cond_equal: acond = Assembler::equal; ncond = Assembler::notEqual; break; -diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp -index 82fd8522b..8016d328a 100644 ---- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp -+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp -@@ -6606,6 +6606,99 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register - bind(DONE_LABEL); - } // string_indexof_char - -+void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, -+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) { -+ ShortBranchVerifier sbv(this); -+ assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); -+ -+ int stride = 16; -+ -+ Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP, -+ SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP, -+ RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT, -+ FOUND_SEQ_CHAR, DONE_LABEL; -+ -+ movptr(result, str1); -+ if (UseAVX >= 2) { -+ cmpl(cnt1, stride); -+ jcc(Assembler::less, SCAN_TO_CHAR_INIT); -+ cmpl(cnt1, stride*2); -+ jcc(Assembler::less, SCAN_TO_16_CHAR_INIT); -+ movdl(vec1, ch); -+ vpbroadcastb(vec1, vec1, Assembler::AVX_256bit); -+ vpxor(vec2, vec2); -+ movl(tmp, cnt1); -+ andl(tmp, 0xFFFFFFE0); //vector count (in chars) -+ andl(cnt1,0x0000001F); //tail count (in chars) -+ -+ bind(SCAN_TO_32_CHAR_LOOP); -+ vmovdqu(vec3, Address(result, 0)); -+ vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit); -+ vptest(vec2, vec3); -+ jcc(Assembler::carryClear, FOUND_CHAR); -+ addptr(result, 32); -+ subl(tmp, stride*2); -+ jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP); -+ jmp(SCAN_TO_16_CHAR); -+ -+ bind(SCAN_TO_16_CHAR_INIT); -+ movdl(vec1, ch); -+ pxor(vec2, vec2); -+ pshufb(vec1, vec2); -+ } -+ -+ bind(SCAN_TO_16_CHAR); -+ cmpl(cnt1, stride); -+ jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left -+ if (UseAVX < 2) { -+ movdl(vec1, ch); -+ pxor(vec2, vec2); -+ pshufb(vec1, vec2); -+ } -+ movl(tmp, cnt1); -+ andl(tmp, 0xFFFFFFF0); //vector count (in bytes) -+ andl(cnt1,0x0000000F); //tail count (in bytes) -+ -+ bind(SCAN_TO_16_CHAR_LOOP); -+ movdqu(vec3, Address(result, 0)); -+ pcmpeqb(vec3, vec1); -+ ptest(vec2, vec3); -+ jcc(Assembler::carryClear, FOUND_CHAR); -+ addptr(result, 16); -+ subl(tmp, stride); -+ jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items... -+ -+ bind(SCAN_TO_CHAR_INIT); -+ testl(cnt1, cnt1); -+ jcc(Assembler::zero, RET_NOT_FOUND); -+ bind(SCAN_TO_CHAR_LOOP); -+ load_unsigned_byte(tmp, Address(result, 0)); -+ cmpl(ch, tmp); -+ jccb(Assembler::equal, FOUND_SEQ_CHAR); -+ addptr(result, 1); -+ subl(cnt1, 1); -+ jccb(Assembler::zero, RET_NOT_FOUND); -+ jmp(SCAN_TO_CHAR_LOOP); -+ -+ bind(RET_NOT_FOUND); -+ movl(result, -1); -+ jmpb(DONE_LABEL); -+ -+ bind(FOUND_CHAR); -+ if (UseAVX >= 2) { -+ vpmovmskb(tmp, vec3); -+ } else { -+ pmovmskb(tmp, vec3); -+ } -+ bsfl(ch, tmp); -+ addptr(result, ch); -+ -+ bind(FOUND_SEQ_CHAR); -+ subptr(result, str1); -+ -+ bind(DONE_LABEL); -+} // stringL_indexof_char -+ - // helper function for string_compare - void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2, - Address::ScaleFactor scale, Address::ScaleFactor scale1, -diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp -index 1bed0cce9..47a062c11 100644 ---- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp -+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp -@@ -1659,6 +1659,8 @@ public: - #ifdef COMPILER2 - void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, - XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); -+ void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, -+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); - - // IndexOf strings. - // Small strings are loaded through stack if they cross page boundary. -diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad -index baa7cc774..238d8729b 100644 ---- a/src/hotspot/cpu/x86/x86.ad -+++ b/src/hotspot/cpu/x86/x86.ad -@@ -1511,10 +1511,13 @@ const bool Matcher::match_rule_supported(int opcode) { - return ret_value; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -+const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen - bool ret_value = match_rule_supported(opcode); -+ if (!vector_size_supported(bt, vlen)) { -+ ret_value = false; -+ } - if (ret_value) { - switch (opcode) { - case Op_AbsVB: -@@ -1642,6 +1645,15 @@ const int Matcher::min_vector_size(const BasicType bt) { - return MIN2(size,max_size); - } - -+const bool Matcher::supports_scalable_vector() { -+ return false; -+} -+ -+const int Matcher::scalable_vector_reg_size(const BasicType bt) { -+ return -1; -+} -+ -+ - // Vector ideal reg corresponding to specified size in bytes - const uint Matcher::vector_ideal_reg(int size) { - assert(MaxVectorSize >= size, ""); -diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad -index bc9947327..bbe49bd62 100644 ---- a/src/hotspot/cpu/x86/x86_32.ad -+++ b/src/hotspot/cpu/x86/x86_32.ad -@@ -11909,12 +11909,12 @@ instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2 - ins_pipe( pipe_slow ); - %} - --instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, -+instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, - eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ -- predicate(UseSSE42Intrinsics); -+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n) -> encoding() == StrIntrinsicNode::U)); - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); - effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); -- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} - ins_encode %{ - __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, - $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); -@@ -11922,6 +11922,19 @@ instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, - ins_pipe( pipe_slow ); - %} - -+instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, -+ eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ -+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); -+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} -+ ins_encode %{ -+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, -+ $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); -+ %} -+ ins_pipe( pipe_slow ); -+%} -+ - // fast array equals - instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, - regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) -diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad -index 7e6739ffe..53f887ea6 100644 ---- a/src/hotspot/cpu/x86/x86_64.ad -+++ b/src/hotspot/cpu/x86/x86_64.ad -@@ -2975,7 +2975,7 @@ frame - RAX_H_num // Op_RegL - }; - // Excluded flags and vector registers. -- assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type"); -+ assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type"); - return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); - %} - %} -@@ -11509,13 +11509,13 @@ instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI - ins_pipe( pipe_slow ); - %} - --instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, -- rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr) -+instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, -+ rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr) - %{ -- predicate(UseSSE42Intrinsics); -+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); - effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); -- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} -+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} - ins_encode %{ - __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, - $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); -@@ -11523,6 +11523,20 @@ instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, - ins_pipe( pipe_slow ); - %} - -+instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, -+ rbx_RegI result, legVecS tmp_vec1, legVecS tmp_vec2, legVecS tmp_vec3, rcx_RegI tmp, rFlagsReg cr) -+%{ -+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); -+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); -+ effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); -+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} -+ ins_encode %{ -+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, -+ $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register); -+ %} -+ ins_pipe( pipe_slow ); -+%} -+ - // fast string equals - instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result, - legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr) diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp -index 74945999e..6c79d20a4 100644 +index 2842a11f92..208a374eea 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp -@@ -1903,7 +1903,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { - {EM_PARISC, EM_PARISC, ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"}, - {EM_68K, EM_68K, ELFCLASS32, ELFDATA2MSB, (char*)"M68k"}, - {EM_AARCH64, EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"}, -- {EM_RISCV, EM_RISCV, ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V"}, -+#ifdef _LP64 -+ {EM_RISCV, EM_RISCV, ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V64"}, -+#else -+ {EM_RISCV, EM_RISCV, ELFCLASS32, ELFDATA2LSB, (char*)"RISC-V32"}, -+#endif - {EM_LOONGARCH, EM_LOONGARCH, ELFCLASS64, ELFDATA2LSB, (char*)"LoongArch"}, - }; - -@@ -2735,6 +2739,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) { +@@ -2829,6 +2829,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) { strncpy(cpuinfo, "IA64", length); #elif defined(PPC) strncpy(cpuinfo, "PPC64", length); +#elif defined(RISCV) -+ strncpy(cpuinfo, LP64_ONLY("RISCV64") NOT_LP64("RISCV32"), length); ++ strncpy(cpuinfo, "RISCV64", length); #elif defined(S390) strncpy(cpuinfo, "S390", length); #elif defined(SPARC) -@@ -3966,7 +3972,8 @@ size_t os::Linux::find_large_page_size() { +@@ -4060,7 +4062,8 @@ size_t os::Linux::find_large_page_size() { IA64_ONLY(256 * M) PPC_ONLY(4 * M) S390_ONLY(1 * M) @@ -54580,14 +51899,46 @@ index 74945999e..6c79d20a4 100644 #endif // ZERO FILE *fp = fopen("/proc/meminfo", "r"); +diff --git a/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp +new file mode 100644 +index 0000000000..f2610af6cd +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/assembler_linux_riscv.cpp +@@ -0,0 +1,26 @@ ++/* ++ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// nothing required here diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp new file mode 100644 -index 000000000..961fff011 +index 0000000000..4a1ebee8b0 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp -@@ -0,0 +1,113 @@ +@@ -0,0 +1,189 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -54614,46 +51965,121 @@ index 000000000..961fff011 +#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP +#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP + -+#include "vm_version_riscv.hpp" ++#include "runtime/vm_version.hpp" + +// Implementation of class atomic ++ +// Note that memory_order_conservative requires a full barrier after atomic stores. +// See https://patchwork.kernel.org/patch/3575821/ + ++#if defined(__clang_major__) ++#define FULL_COMPILER_ATOMIC_SUPPORT ++#elif (__GNUC__ > 13) || ((__GNUC__ == 13) && (__GNUC_MINOR__ >= 2)) ++#define FULL_COMPILER_ATOMIC_SUPPORT ++#endif ++ +#define FULL_MEM_BARRIER __sync_synchronize() +#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); +#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); + +template +struct Atomic::PlatformAdd -+ : public Atomic::AddAndFetch > ++ : Atomic::FetchAndAdd > +{ + template + D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { ++#ifndef FULL_COMPILER_ATOMIC_SUPPORT ++ // If we add add and fetch for sub word and are using older compiler ++ // it must be added here due to not using lib atomic. ++ STATIC_ASSERT(byte_size >= 4); ++#endif ++ + D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE); + FULL_MEM_BARRIER; + return res; + } ++ ++ template ++ D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const { ++ return add_and_fetch(add_value, dest, order) - add_value; ++ } +}; + ++#ifndef FULL_COMPILER_ATOMIC_SUPPORT ++template<> ++template ++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, ++ T volatile* dest __attribute__((unused)), ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(1 == sizeof(T)); ++ ++ if (order != memory_order_relaxed) { ++ FULL_MEM_BARRIER; ++ } ++ ++ uint32_t volatile* aligned_dst = (uint32_t volatile*)(((uintptr_t)dest) & (~((uintptr_t)0x3))); ++ int shift = 8 * (((uintptr_t)dest) - ((uintptr_t)aligned_dst)); // 0, 8, 16, 24 ++ ++ uint64_t mask = 0xfful << shift; // 0x00000000..FF.. ++ uint64_t remask = ~mask; // 0xFFFFFFFF..00.. ++ ++ uint64_t w_cv = ((uint64_t)(unsigned char)compare_value) << shift; // widen to 64-bit 0x00000000..CC.. ++ uint64_t w_ev = ((uint64_t)(unsigned char)exchange_value) << shift; // widen to 64-bit 0x00000000..EE.. ++ ++ uint64_t old_value; ++ uint64_t rc_temp; ++ ++ __asm__ __volatile__ ( ++ "1: lr.w %0, %2 \n\t" ++ " and %1, %0, %5 \n\t" // ignore unrelated bytes and widen to 64-bit 0x00000000..XX.. ++ " bne %1, %3, 2f \n\t" // compare 64-bit w_cv ++ " and %1, %0, %6 \n\t" // remove old byte ++ " or %1, %1, %4 \n\t" // add new byte ++ " sc.w %1, %1, %2 \n\t" // store new word ++ " bnez %1, 1b \n\t" ++ "2: \n\t" ++ : /*%0*/"=&r" (old_value), /*%1*/"=&r" (rc_temp), /*%2*/"+A" (*aligned_dst) ++ : /*%3*/"r" (w_cv), /*%4*/"r" (w_ev), /*%5*/"r" (mask), /*%6*/"r" (remask) ++ : "memory" ); ++ ++ if (order != memory_order_relaxed) { ++ FULL_MEM_BARRIER; ++ } ++ ++ return (T)((old_value & mask) >> shift); ++} ++#endif ++ +template +template +inline T Atomic::PlatformXchg::operator()(T exchange_value, + T volatile* dest, + atomic_memory_order order) const { ++#ifndef FULL_COMPILER_ATOMIC_SUPPORT ++ // If we add xchg for sub word and are using older compiler ++ // it must be added here due to not using lib atomic. ++ STATIC_ASSERT(byte_size >= 4); ++#endif ++ + STATIC_ASSERT(byte_size == sizeof(T)); + T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE); + FULL_MEM_BARRIER; + return res; +} + -+// No direct support for cmpxchg of bytes; emulate using int. ++// __attribute__((unused)) on dest is to get rid of spurious GCC warnings. +template +template +inline T Atomic::PlatformCmpxchg::operator()(T exchange_value, -+ T volatile* dest, ++ T volatile* dest __attribute__((unused)), + T compare_value, + atomic_memory_order order) const { ++ ++#ifndef FULL_COMPILER_ATOMIC_SUPPORT ++ STATIC_ASSERT(byte_size >= 4); ++#endif ++ + STATIC_ASSERT(byte_size == sizeof(T)); + T value = compare_value; + if (order != memory_order_relaxed) { @@ -54672,7 +52098,7 @@ index 000000000..961fff011 +template<> +template +inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, -+ T volatile* dest, ++ T volatile* dest __attribute__((unused)), + T compare_value, + atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(T)); @@ -54698,15 +52124,16 @@ index 000000000..961fff011 + return rv; +} + ++#undef FULL_COMPILER_ATOMIC_SUPPORT +#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP -diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp +diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp new file mode 100644 -index 000000000..44f04d1a9 +index 0000000000..28868c7640 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp -@@ -0,0 +1,44 @@ ++++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.hpp +@@ -0,0 +1,45 @@ +/* -+ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -54727,20 +52154,21 @@ index 000000000..44f04d1a9 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. ++ * + */ + -+#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP -+#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP ++#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP + +#include + +// Efficient swapping of data bytes from Java byte +// ordering to native byte ordering and vice versa. -+inline u2 Bytes::swap_u2(u2 x) { ++inline u2 Bytes::swap_u2(u2 x) { + return bswap_16(x); +} + -+inline u4 Bytes::swap_u4(u4 x) { ++inline u4 Bytes::swap_u4(u4 x) { + return bswap_32(x); +} + @@ -54748,16 +52176,16 @@ index 000000000..44f04d1a9 + return bswap_64(x); +} + -+#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP ++#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp new file mode 100644 -index 000000000..645b40a7c +index 0000000000..bdf36d6b4c --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp -@@ -0,0 +1,116 @@ +@@ -0,0 +1,124 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -54787,7 +52215,7 @@ index 000000000..645b40a7c + (void)memmove(to, from, count * HeapWordSize); +} + -+static inline void pd_disjoint_words_helper(const HeapWord* from, HeapWord* to, size_t count, bool is_atomic) { ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; // fall through + case 7: to[6] = from[6]; // fall through @@ -54799,20 +52227,28 @@ index 000000000..645b40a7c + case 1: to[0] = from[0]; // fall through + case 0: break; + default: -+ if(is_atomic) { -+ while (count-- > 0) { *to++ = *from++; } -+ } else { -+ memcpy(to, from, count * HeapWordSize); -+ } ++ memcpy(to, from, count * HeapWordSize); ++ break; + } +} + -+static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_disjoint_words_helper(from, to, count, false); -+} -+ +static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { -+ pd_disjoint_words_helper(from, to, count, true); ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } +} + +static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { @@ -54873,12 +52309,12 @@ index 000000000..645b40a7c +#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp new file mode 100644 -index 000000000..041cdf4ff +index 0000000000..297414bfcd --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp @@ -0,0 +1,43 @@ +/* -+ * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -54922,12 +52358,12 @@ index 000000000..041cdf4ff +#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp new file mode 100644 -index 000000000..842aa51e0 +index 0000000000..5b5d35553f --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp -@@ -0,0 +1,73 @@ +@@ -0,0 +1,74 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -54956,7 +52392,7 @@ index 000000000..842aa51e0 + +// Included in orderAccess.hpp header file. + -+#include "vm_version_riscv.hpp" ++#include "runtime/vm_version.hpp" + +// Implementation of class OrderAccess. + @@ -54977,37 +52413,38 @@ index 000000000..842aa51e0 + FULL_MEM_BARRIER; +} + ++ +template +struct OrderAccess::PlatformOrderedLoad +{ + template -+ T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } ++ T operator()(const volatile T* p) const { T data; __atomic_load(const_cast(p), &data, __ATOMIC_ACQUIRE); return data; } +}; + +template +struct OrderAccess::PlatformOrderedStore +{ + template -+ void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); } ++ void operator()(T v, volatile T* p) const { __atomic_store(const_cast(p), &v, __ATOMIC_RELEASE); } +}; + +template +struct OrderAccess::PlatformOrderedStore +{ + template -+ void operator()(T v, volatile T* p) const { release_store(p, v); fence(); } ++ void operator()(T v, volatile T* p) const { release_store(p, v); OrderAccess::fence(); } +}; + +#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp new file mode 100644 -index 000000000..37947701b +index 0000000000..8b772892b4 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp -@@ -0,0 +1,628 @@ +@@ -0,0 +1,624 @@ +/* -+ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -55032,8 +52469,6 @@ index 000000000..37947701b + +// no precompiled headers +#include "asm/macroAssembler.hpp" -+#include "classfile/classLoader.hpp" -+#include "classfile/systemDictionary.hpp" +#include "classfile/vmSymbols.hpp" +#include "code/codeCache.hpp" +#include "code/icBuffer.hpp" @@ -55053,6 +52488,7 @@ index 000000000..37947701b +#include "runtime/javaCalls.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/osThread.hpp" ++#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" @@ -55063,6 +52499,7 @@ index 000000000..37947701b + +// put OS-includes here +# include ++# include +# include +# include +# include @@ -55115,9 +52552,7 @@ index 000000000..37947701b +// frames. Currently we don't do that on Linux, so it's the same as +// os::fetch_frame_from_context(). +ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, -+ const ucontext_t* uc, -+ intptr_t** ret_sp, -+ intptr_t** ret_fp) { ++ const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { + + assert(thread != NULL, "just checking"); + assert(ret_sp != NULL, "just checking"); @@ -55127,9 +52562,9 @@ index 000000000..37947701b +} + +ExtendedPC os::fetch_frame_from_context(const void* ucVoid, -+ intptr_t** ret_sp, intptr_t** ret_fp) { ++ intptr_t** ret_sp, intptr_t** ret_fp) { + -+ ExtendedPC epc; ++ ExtendedPC epc; + const ucontext_t* uc = (const ucontext_t*)ucVoid; + + if (uc != NULL) { @@ -55185,11 +52620,11 @@ index 000000000..37947701b + // In compiled code, the stack banging is performed before RA + // has been saved in the frame. RA is live, and SP and FP + // belong to the caller. -+ intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc); -+ intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc); -+ address frame_pc = (address)(uintptr_t)(uc->uc_mcontext.__gregs[REG_LR] - -+ NativeInstruction::instruction_size); -+ *fr = frame(frame_sp, frame_fp, frame_pc); ++ intptr_t* fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* sp = os::Linux::ucontext_get_sp(uc); ++ address pc = (address)(uc->uc_mcontext.__gregs[REG_LR] ++ - NativeInstruction::instruction_size); ++ *fr = frame(sp, fp, pc); + if (!fr->is_java_frame()) { + assert(fr->safe_for_sender(thread), "Safety check"); + assert(!fr->is_first_frame(), "Safety check"); @@ -55209,7 +52644,7 @@ index 000000000..37947701b + +NOINLINE frame os::current_frame() { + intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0); -+ if(sender_sp != NULL) { ++ if (sender_sp != NULL) { + frame myframe((intptr_t*)os::current_stack_pointer(), + sender_sp[frame::link_offset], + CAST_FROM_FN_PTR(address, os::current_frame)); @@ -55269,19 +52704,20 @@ index 000000000..37947701b + JavaThread* thread = NULL; + VMThread* vmthread = NULL; + if (os::Linux::signal_handlers_are_installed) { -+ if (t != NULL ) { ++ if (t != NULL ){ + if(t->is_Java_thread()) { -+ thread = (JavaThread*)t; -+ } else if(t->is_VM_thread()) { ++ thread = (JavaThread *) t; ++ } ++ else if(t->is_VM_thread()){ + vmthread = (VMThread *)t; + } + } + } + + // Handle SafeFetch faults -+ if (uc != NULL) { ++ if ((sig == SIGSEGV || sig == SIGBUS) && uc != NULL) { + address const pc = (address) os::Linux::ucontext_get_pc(uc); -+ if (StubRoutines::is_safefetch_fault(pc)) { ++ if (pc && StubRoutines::is_safefetch_fault(pc)) { + os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); + return 1; + } @@ -55290,7 +52726,7 @@ index 000000000..37947701b + // decide if this trap can be handled by a stub + address stub = NULL; + -+ address pc = NULL; ++ address pc = NULL; + + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { @@ -55385,7 +52821,7 @@ index 000000000..37947701b + stub = SharedRuntime::handle_unsafe_access(thread, next_pc); + } + } else if (sig == SIGFPE && -+ (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { ++ (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { + stub = + SharedRuntime:: + continuation_for_implicit_exception(thread, @@ -55393,7 +52829,7 @@ index 000000000..37947701b + SharedRuntime:: + IMPLICIT_DIVIDE_BY_ZERO); + } else if (sig == SIGSEGV && -+ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { ++ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + } @@ -55407,9 +52843,9 @@ index 000000000..37947701b + // jni_fast_GetField can trap at certain pc's if a GC kicks in + // and the heap gets shrunk before the field access. + if ((sig == SIGSEGV) || (sig == SIGBUS)) { -+ address addr_slow = JNI_FastGetField::find_slowcase_pc(pc); -+ if (addr_slow != (address)-1) { -+ stub = addr_slow; ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; + } + } + @@ -55427,9 +52863,7 @@ index 000000000..37947701b + + if (stub != NULL) { + // save all thread context in case we need to restore it -+ if (thread != NULL) { -+ thread->set_saved_exception_pc(pc); -+ } ++ if (thread != NULL) thread->set_saved_exception_pc(pc); + + os::Linux::ucontext_set_pc(uc, stub); + return true; @@ -55471,7 +52905,6 @@ index 000000000..37947701b +void os::Linux::set_fpu_control_word(int fpu_control) { +} + -+ +//////////////////////////////////////////////////////////////////////////////// +// thread stack + @@ -55635,13 +53068,13 @@ index 000000000..37947701b +}; diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp new file mode 100644 -index 000000000..eae1635b0 +index 0000000000..f3e3a73bc5 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp @@ -0,0 +1,40 @@ +/* -+ * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -55681,12 +53114,12 @@ index 000000000..eae1635b0 +#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp new file mode 100644 -index 000000000..82b9bb6fd +index 0000000000..2bd48e09c3 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp @@ -0,0 +1,38 @@ +/* -+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55723,14 +53156,142 @@ index 000000000..82b9bb6fd +} + +#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp +new file mode 100644 +index 0000000000..ffcd819487 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.cpp +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2023, Rivos Inc. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "logging/log.hpp" ++#include "riscv_flush_icache.hpp" ++#include "runtime/os.hpp" ++#include "runtime/vm_version.hpp" ++#include "utilities/debug.hpp" ++ ++#include ++#include ++ ++#define check_with_errno(check_type, cond, msg) \ ++ do { \ ++ int err = errno; \ ++ check_type(cond, "%s; error='%s' (errno=%s)", msg, os::strerror(err), \ ++ os::errno_name(err)); \ ++} while (false) ++ ++#define assert_with_errno(cond, msg) check_with_errno(assert, cond, msg) ++#define guarantee_with_errno(cond, msg) check_with_errno(guarantee, cond, msg) ++ ++#ifndef NR_riscv_flush_icache ++#ifndef NR_arch_specific_syscall ++#define NR_arch_specific_syscall 244 ++#endif ++#define NR_riscv_flush_icache (NR_arch_specific_syscall + 15) ++#endif ++ ++#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL ++#define SYS_RISCV_FLUSH_ICACHE_ALL 0UL ++ ++static long sys_flush_icache(uintptr_t start, uintptr_t end , uintptr_t flags) { ++ return syscall(NR_riscv_flush_icache, start, end, flags); ++} ++ ++bool RiscvFlushIcache::test() { ++ ATTRIBUTE_ALIGNED(64) char memory[64]; ++ long ret = sys_flush_icache((uintptr_t)&memory[0], ++ (uintptr_t)&memory[sizeof(memory) - 1], ++ SYS_RISCV_FLUSH_ICACHE_ALL); ++ if (ret == 0) { ++ return true; ++ } ++ int err = errno; \ ++ log_error(os)("Syscall: RISCV_FLUSH_ICACHE not available; error='%s' (errno=%s)", ++ os::strerror(err), os::errno_name(err)); ++ return false; ++} ++ ++void RiscvFlushIcache::flush(uintptr_t start, uintptr_t end) { ++ long ret = sys_flush_icache(start, end, SYS_RISCV_FLUSH_ICACHE_ALL); ++ guarantee_with_errno(ret == 0, "riscv_flush_icache failed"); ++} +diff --git a/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp +new file mode 100644 +index 0000000000..f4e7263b39 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/riscv_flush_icache.hpp +@@ -0,0 +1,39 @@ ++/* ++ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2023, Rivos Inc. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP ++#define OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP ++ ++#include "memory/allocation.hpp" ++#include "runtime/vm_version.hpp" ++#include "utilities/growableArray.hpp" ++ ++class RiscvFlushIcache: public AllStatic { ++ public: ++ static bool test(); ++ static void flush(uintptr_t start, uintptr_t end); ++}; ++ ++#endif // OS_LINUX_RISCV_FLUSH_ICACHE_LINUX_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp new file mode 100644 -index 000000000..c78096931 +index 0000000000..ccceed643e --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp -@@ -0,0 +1,103 @@ +@@ -0,0 +1,100 @@ +/* -+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55778,13 +53339,10 @@ index 000000000..c78096931 +} + +bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { -+ assert(this->is_Java_thread(), "must be JavaThread"); -+ JavaThread* jt = (JavaThread *)this; -+ + // If we have a last_Java_frame, then we should use it even if + // isInJava == true. It should be more reliable than ucontext info. -+ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { -+ *fr_addr = jt->pd_last_frame(); ++ if (has_last_Java_frame() && frame_anchor()->walkable()) { ++ *fr_addr = pd_last_frame(); + return true; + } + @@ -55810,10 +53368,10 @@ index 000000000..c78096931 + } + + frame ret_frame(ret_sp, ret_fp, addr.pc()); -+ if (!ret_frame.safe_for_sender(jt)) { ++ if (!ret_frame.safe_for_sender(this)) { +#ifdef COMPILER2 + frame ret_frame2(ret_sp, NULL, addr.pc()); -+ if (!ret_frame2.safe_for_sender(jt)) { ++ if (!ret_frame2.safe_for_sender(this)) { + // nothing else to try if the frame isn't good + return false; + } @@ -55834,12 +53392,12 @@ index 000000000..c78096931 +void JavaThread::cache_global_variables() { } diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp new file mode 100644 -index 000000000..657b98984 +index 0000000000..4b91fa855a --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp @@ -0,0 +1,67 @@ +/* -+ * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55876,7 +53434,7 @@ index 000000000..657b98984 + public: + // Mutators are highly dangerous.... + intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } -+ void set_last_Java_fp(intptr_t* java_fp) { _anchor.set_last_Java_fp(java_fp); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } + + void set_base_of_stack_pointer(intptr_t* base_sp) { + } @@ -55897,7 +53455,7 @@ index 000000000..657b98984 + bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); +private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); -+ ++public: + // These routines are only used on cpu architectures that + // have separate register stacks (Itanium). + static bool register_stack_overflow() { return false; } @@ -55907,12 +53465,12 @@ index 000000000..657b98984 +#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp new file mode 100644 -index 000000000..8ee443b5d +index 0000000000..6cf7683a58 --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp @@ -0,0 +1,55 @@ +/* -+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -55968,13 +53526,13 @@ index 000000000..8ee443b5d +#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp new file mode 100644 -index 000000000..ef9358aa0 +index 0000000000..8bcc949fed --- /dev/null +++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp -@@ -0,0 +1,116 @@ +@@ -0,0 +1,137 @@ +/* + * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -56049,6 +53607,20 @@ index 000000000..ef9358aa0 + return (uint32_t)read_csr(CSR_VLENB); +} + ++VM_Version::VM_MODE VM_Version::get_satp_mode() { ++ if (!strcmp(_vm_mode, "sv39")) { ++ return VM_SV39; ++ } else if (!strcmp(_vm_mode, "sv48")) { ++ return VM_SV48; ++ } else if (!strcmp(_vm_mode, "sv57")) { ++ return VM_SV57; ++ } else if (!strcmp(_vm_mode, "sv64")) { ++ return VM_SV64; ++ } else { ++ return VM_MBARE; ++ } ++} ++ +void VM_Version::get_os_cpu_info() { + + uint64_t auxv = getauxval(AT_HWCAP); @@ -56061,11 +53633,30 @@ index 000000000..ef9358aa0 + STATIC_ASSERT(CPU_C == HWCAP_ISA_C); + STATIC_ASSERT(CPU_V == HWCAP_ISA_V); + ++ // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs. ++ // Availability for those extensions could not be queried from HWCAP. ++ // TODO: Add proper detection for those extensions. ++ _features = auxv & ( ++ HWCAP_ISA_I | ++ HWCAP_ISA_M | ++ HWCAP_ISA_A | ++ HWCAP_ISA_F | ++ HWCAP_ISA_D | ++ HWCAP_ISA_C | ++ HWCAP_ISA_V); ++ + if (FILE *f = fopen("/proc/cpuinfo", "r")) { + char buf[512], *p; + while (fgets(buf, sizeof (buf), f) != NULL) { + if ((p = strchr(buf, ':')) != NULL) { -+ if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) { ++ if (strncmp(buf, "mmu", sizeof "mmu" - 1) == 0) { ++ if (_vm_mode[0] != '\0') { ++ continue; ++ } ++ char* vm_mode = os::strdup(p + 2); ++ vm_mode[strcspn(vm_mode, "\n")] = '\0'; ++ _vm_mode = vm_mode; ++ } else if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) { + char* uarch = os::strdup(p + 2); + uarch[strcspn(uarch, "\n")] = '\0'; + _uarch = uarch; @@ -56075,105 +53666,71 @@ index 000000000..ef9358aa0 + } + fclose(f); + } -+ -+ // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs. -+ // Availability for those extensions could not be queried from HWCAP. -+ // TODO: Add proper detection for those extensions. -+ _features = auxv & ( -+ HWCAP_ISA_I | -+ HWCAP_ISA_M | -+ HWCAP_ISA_A | -+ HWCAP_ISA_F | -+ HWCAP_ISA_D | -+ HWCAP_ISA_C | -+ HWCAP_ISA_V); +} -diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp -index ba61aa4c0..4ca0b050b 100644 ---- a/src/hotspot/share/adlc/archDesc.cpp -+++ b/src/hotspot/share/adlc/archDesc.cpp -@@ -929,6 +929,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) { - // Match Vector types. - if (strncmp(idealOp, "Vec",3)==0) { - switch(last_char) { -+ case 'A': return "TypeVect::VECTA"; - case 'S': return "TypeVect::VECTS"; - case 'D': return "TypeVect::VECTD"; - case 'X': return "TypeVect::VECTX"; -@@ -939,6 +940,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) { - } - } - -+ if (strncmp(idealOp, "RegVMask", 8) == 0) { -+ return "Type::BOTTOM"; -+ } -+ - // !!!!! - switch(last_char) { - case 'I': return "TypeInt::INT"; -diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp -index f810fde76..2cf9636d1 100644 ---- a/src/hotspot/share/adlc/formssel.cpp -+++ b/src/hotspot/share/adlc/formssel.cpp -@@ -3968,6 +3968,8 @@ bool MatchRule::is_base_register(FormDict &globals) const { - strcmp(opType,"RegL")==0 || - strcmp(opType,"RegF")==0 || - strcmp(opType,"RegD")==0 || -+ strcmp(opType,"RegVMask")==0 || -+ strcmp(opType,"VecA")==0 || - strcmp(opType,"VecS")==0 || - strcmp(opType,"VecD")==0 || - strcmp(opType,"VecX")==0 || diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp -index e30d39f73..af54dddf3 100644 +index e30d39f73d..c640c546b1 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp -@@ -199,7 +199,6 @@ bool LIR_OprDesc::is_oop() const { - void LIR_Op2::verify() const { +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -200,6 +200,9 @@ void LIR_Op2::verify() const { #ifdef ASSERT switch (code()) { -- case lir_cmove: + case lir_cmove: ++#ifdef RISCV ++ assert(false, "lir_cmove is LIR_Op4 on RISCV"); ++#endif case lir_xchg: break; -@@ -252,30 +251,27 @@ void LIR_Op2::verify() const { +@@ -252,9 +255,13 @@ void LIR_Op2::verify() const { LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block) -- : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) -- , _cond(cond) -- , _type(type) -+ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) ++#ifdef RISCV ++ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) ++#else + : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) + , _cond(cond) + , _type(type) ++#endif , _label(block->label()) -+ , _type(type) , _block(block) , _ublock(NULL) - , _stub(NULL) { +@@ -262,9 +269,13 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block } LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : -- LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) -- , _cond(cond) -- , _type(type) -+ LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) ++#ifdef RISCV ++ LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) ++#else + LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) + , _cond(cond) + , _type(type) ++#endif , _label(stub->entry()) -+ , _type(type) , _block(NULL) , _ublock(NULL) - , _stub(stub) { +@@ -272,9 +283,13 @@ LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : } LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock) -- : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) -- , _cond(cond) -- , _type(type) -+ : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) ++#ifdef RISCV ++ : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL, type) ++#else + : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) + , _cond(cond) + , _type(type) ++#endif , _label(block->label()) -+ , _type(type) , _block(block) , _ublock(ublock) - , _stub(NULL) -@@ -296,13 +292,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) { +@@ -296,13 +311,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) { } void LIR_OpBranch::negate_cond() { @@ -56194,61 +53751,66 @@ index e30d39f73..af54dddf3 100644 default: ShouldNotReachHere(); } } -@@ -525,6 +521,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) { +@@ -525,6 +540,15 @@ void LIR_OpVisitState::visit(LIR_Op* op) { assert(op->as_OpBranch() != NULL, "must be"); LIR_OpBranch* opBranch = (LIR_OpBranch*)op; ++#ifdef RISCV + assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() && + opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() && + opBranch->_tmp5->is_illegal(), "not used"); + + if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1); + if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2); ++#endif + if (opBranch->_info != NULL) do_info(opBranch->_info); assert(opBranch->_result->is_illegal(), "not used"); if (opBranch->_stub != NULL) opBranch->stub()->visit(this); -@@ -615,17 +618,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) { +@@ -615,6 +639,21 @@ void LIR_OpVisitState::visit(LIR_Op* op) { // to the result operand, otherwise the backend fails case lir_cmove: { -- assert(op->as_Op2() != NULL, "must be"); -- LIR_Op2* op2 = (LIR_Op2*)op; ++#ifdef RISCV + assert(op->as_Op4() != NULL, "must be"); + LIR_Op4* op4 = (LIR_Op4*)op; - -- assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() && -- op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); -- assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used"); ++ + assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() && -+ op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "must be"); ++ op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "not used"); + assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used"); - -- do_input(op2->_opr1); -- do_input(op2->_opr2); -- do_temp(op2->_opr2); -- do_output(op2->_result); ++ + do_input(op4->_opr1); + do_input(op4->_opr2); + if (op4->_opr3->is_valid()) do_input(op4->_opr3); + if (op4->_opr4->is_valid()) do_input(op4->_opr4); + do_temp(op4->_opr2); + do_output(op4->_result); ++#else + assert(op->as_Op2() != NULL, "must be"); + LIR_Op2* op2 = (LIR_Op2*)op; + +@@ -626,6 +665,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + do_input(op2->_opr2); + do_temp(op2->_opr2); + do_output(op2->_result); ++#endif break; } -@@ -1048,6 +1053,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { +@@ -1048,6 +1088,12 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { masm->emit_op3(this); } ++#ifdef RISCV +void LIR_Op4::emit_code(LIR_Assembler* masm) { + masm->emit_op4(this); +} ++#endif + void LIR_OpLock::emit_code(LIR_Assembler* masm) { masm->emit_lock(this); if (stub()) { -@@ -1084,6 +1093,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block) +@@ -1084,6 +1130,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block) , _file(NULL) , _line(0) #endif @@ -56259,7 +53821,7 @@ index e30d39f73..af54dddf3 100644 { } -@@ -1101,6 +1114,38 @@ void LIR_List::set_file_and_line(const char * file, int line) { +@@ -1101,6 +1151,38 @@ void LIR_List::set_file_and_line(const char * file, int line) { } #endif @@ -56298,45 +53860,34 @@ index e30d39f73..af54dddf3 100644 void LIR_List::append(LIR_InsertionBuffer* buffer) { assert(this == buffer->lir_list(), "wrong lir list"); -@@ -1680,7 +1725,6 @@ const char * LIR_Op::name() const { - case lir_cmp_l2i: s = "cmp_l2i"; break; - case lir_ucmp_fd2i: s = "ucomp_fd2i"; break; - case lir_cmp_fd2i: s = "comp_fd2i"; break; -- case lir_cmove: s = "cmove"; break; - case lir_add: s = "add"; break; - case lir_sub: s = "sub"; break; - case lir_mul: s = "mul"; break; -@@ -1705,6 +1749,8 @@ const char * LIR_Op::name() const { - case lir_irem: s = "irem"; break; - case lir_fmad: s = "fmad"; break; - case lir_fmaf: s = "fmaf"; break; -+ // LIR_Op4 -+ case lir_cmove: s = "cmove"; break; - // LIR_OpJavaCall - case lir_static_call: s = "static"; break; - case lir_optvirtual_call: s = "optvirtual"; break; -@@ -1841,6 +1887,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) { +@@ -1841,6 +1923,10 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) { // LIR_OpBranch void LIR_OpBranch::print_instr(outputStream* out) const { print_condition(out, cond()); out->print(" "); ++#ifdef RISCV + in_opr1()->print(out); out->print(" "); + in_opr2()->print(out); out->print(" "); ++#endif if (block() != NULL) { out->print("[B%d] ", block()->block_id()); } else if (stub() != NULL) { -@@ -1927,7 +1975,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const { +@@ -1927,7 +2013,11 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const { // LIR_Op2 void LIR_Op2::print_instr(outputStream* out) const { -- if (code() == lir_cmove || code() == lir_cmp) { ++#ifdef RISCV + if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) { ++#else + if (code() == lir_cmove || code() == lir_cmp) { ++#endif print_condition(out, condition()); out->print(" "); } in_opr1()->print(out); out->print(" "); -@@ -1978,6 +2026,15 @@ void LIR_Op3::print_instr(outputStream* out) const { +@@ -1978,6 +2068,17 @@ void LIR_Op3::print_instr(outputStream* out) const { result_opr()->print(out); } ++#ifdef RISCV +// LIR_Op4 +void LIR_Op4::print_instr(outputStream* out) const { + print_condition(out, condition()); out->print(" "); @@ -56346,68 +53897,95 @@ index e30d39f73..af54dddf3 100644 + in_opr4()->print(out); out->print(" "); + result_opr()->print(out); +} ++#endif void LIR_OpLock::print_instr(outputStream* out) const { hdr_opr()->print(out); out->print(" "); diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp -index 3234ca018..88cd3b24e 100644 +index 3234ca018b..33943e369d 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp -@@ -864,9 +864,11 @@ class LIR_OpConvert; - class LIR_OpAllocObj; - class LIR_OpRoundFP; - class LIR_Op2; --class LIR_OpDelay; -+class LIR_OpBranch; -+class LIR_OpDelay; +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -867,6 +867,9 @@ class LIR_Op2; + class LIR_OpDelay; class LIR_Op3; class LIR_OpAllocArray; ++#ifdef RISCV +class LIR_Op4; ++#endif class LIR_OpCall; class LIR_OpJavaCall; class LIR_OpRTCall; -@@ -916,8 +918,6 @@ enum LIR_Code { +@@ -916,8 +919,10 @@ enum LIR_Code { , lir_null_check , lir_return , lir_leal -- , lir_branch -- , lir_cond_float_branch ++#ifndef RISCV + , lir_branch + , lir_cond_float_branch ++#endif , lir_move , lir_convert , lir_alloc_object -@@ -929,11 +929,12 @@ enum LIR_Code { +@@ -929,11 +934,17 @@ enum LIR_Code { , lir_unwind , end_op1 , begin_op2 ++#ifdef RISCV + , lir_branch + , lir_cond_float_branch ++#endif , lir_cmp , lir_cmp_l2i , lir_ucmp_fd2i , lir_cmp_fd2i -- , lir_cmove ++#ifndef RISCV + , lir_cmove ++#endif , lir_add , lir_sub , lir_mul -@@ -964,6 +965,9 @@ enum LIR_Code { +@@ -964,6 +975,11 @@ enum LIR_Code { , lir_fmad , lir_fmaf , end_op3 ++#ifdef RISCV + , begin_op4 + , lir_cmove + , end_op4 ++#endif , begin_opJavaCall , lir_static_call , lir_optvirtual_call -@@ -1134,6 +1138,7 @@ class LIR_Op: public CompilationResourceObj { +@@ -1001,6 +1017,11 @@ enum LIR_Code { + , begin_opAssert + , lir_assert + , end_opAssert ++#if defined(RISCV) && defined(INCLUDE_ZGC) ++ , begin_opZLoadBarrierTest ++ , lir_zloadbarrier_test ++ , end_opZLoadBarrierTest ++#endif + }; + + +@@ -1134,6 +1155,9 @@ class LIR_Op: public CompilationResourceObj { virtual LIR_Op1* as_Op1() { return NULL; } virtual LIR_Op2* as_Op2() { return NULL; } virtual LIR_Op3* as_Op3() { return NULL; } ++#ifdef RISCV + virtual LIR_Op4* as_Op4() { return NULL; } ++#endif virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; } virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; } -@@ -1410,51 +1415,6 @@ class LIR_OpRTCall: public LIR_OpCall { +@@ -1410,51 +1434,6 @@ class LIR_OpRTCall: public LIR_OpCall { virtual void verify() const; }; @@ -56459,7 +54037,7 @@ index 3234ca018..88cd3b24e 100644 class ConversionStub; class LIR_OpConvert: public LIR_Op1 { -@@ -1614,19 +1574,19 @@ class LIR_Op2: public LIR_Op { +@@ -1614,19 +1593,19 @@ class LIR_Op2: public LIR_Op { void verify() const; public: @@ -56470,8 +54048,8 @@ index 3234ca018..88cd3b24e 100644 , _opr2(opr2) - , _type(T_ILLEGAL) - , _condition(condition) -+ , _type(type) , _fpu_stack_size(0) ++ , _type(type) , _tmp1(LIR_OprFact::illegalOpr) , _tmp2(LIR_OprFact::illegalOpr) , _tmp3(LIR_OprFact::illegalOpr) @@ -56480,19 +54058,11 @@ index 3234ca018..88cd3b24e 100644 - assert(code == lir_cmp || code == lir_assert, "code check"); + , _tmp5(LIR_OprFact::illegalOpr) + , _condition(condition) { -+ assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check"); ++ assert(code == lir_cmp || code == lir_assert RISCV_ONLY(|| code == lir_branch || code == lir_cond_float_branch), "code check"); } LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) -@@ -1634,7 +1594,6 @@ class LIR_Op2: public LIR_Op { - , _opr1(opr1) - , _opr2(opr2) - , _type(type) -- , _condition(condition) - , _fpu_stack_size(0) - , _tmp1(LIR_OprFact::illegalOpr) - , _tmp2(LIR_OprFact::illegalOpr) -@@ -1651,14 +1610,14 @@ class LIR_Op2: public LIR_Op { +@@ -1651,14 +1630,14 @@ class LIR_Op2: public LIR_Op { , _opr1(opr1) , _opr2(opr2) , _type(type) @@ -56506,11 +54076,11 @@ index 3234ca018..88cd3b24e 100644 - assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); + , _tmp5(LIR_OprFact::illegalOpr) + , _condition(lir_cond_unknown) { -+ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); ++ assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check"); } LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, -@@ -1667,14 +1626,14 @@ class LIR_Op2: public LIR_Op { +@@ -1667,14 +1646,14 @@ class LIR_Op2: public LIR_Op { , _opr1(opr1) , _opr2(opr2) , _type(T_ILLEGAL) @@ -56524,32 +54094,45 @@ index 3234ca018..88cd3b24e 100644 - assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); + , _tmp5(tmp5) + , _condition(lir_cond_unknown) { -+ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); ++ assert(code != lir_cmp && RISCV_ONLY(code != lir_branch && code != lir_cond_float_branch &&) is_in_range(code, begin_op2, end_op2), "code check"); } LIR_Opr in_opr1() const { return _opr1; } -@@ -1686,10 +1645,10 @@ class LIR_Op2: public LIR_Op { +@@ -1686,10 +1665,18 @@ class LIR_Op2: public LIR_Op { LIR_Opr tmp4_opr() const { return _tmp4; } LIR_Opr tmp5_opr() const { return _tmp5; } LIR_Condition condition() const { -- assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; ++#ifdef RISCV + assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition; ++#else + assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; ++#endif } void set_condition(LIR_Condition condition) { -- assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; ++#ifdef RISCV + assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition; ++#else + assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; ++#endif } void set_fpu_stack_size(int size) { _fpu_stack_size = size; } -@@ -1703,6 +1662,53 @@ class LIR_Op2: public LIR_Op { +@@ -1703,6 +1690,65 @@ class LIR_Op2: public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; ++#ifdef RISCV +class LIR_OpBranch: public LIR_Op2 { ++#else ++class LIR_OpBranch: public LIR_Op { ++#endif + friend class LIR_OpVisitState; + + private: ++#ifndef RISCV ++ LIR_Condition _cond; + BasicType _type; ++#endif + Label* _label; + BlockBegin* _block; // if this is a branch to a block, this is the block + BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block @@ -56557,9 +54140,14 @@ index 3234ca018..88cd3b24e 100644 + + public: + LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl) -+ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) -+ , _label(lbl) ++#ifdef RISCV ++ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL, type) ++#else ++ : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) ++ , _cond(cond) + , _type(type) ++#endif ++ , _label(lbl) + , _block(NULL) + , _ublock(NULL) + , _stub(NULL) { } @@ -56570,14 +54158,14 @@ index 3234ca018..88cd3b24e 100644 + // for unordered comparisons + LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock); + -+ LIR_Condition cond() const { -+ return condition(); -+ } -+ -+ void set_cond(LIR_Condition cond) { -+ set_condition(cond); -+ } -+ ++#ifdef RISCV ++ LIR_Condition cond() const { return condition(); } ++ void set_cond(LIR_Condition cond) { set_condition(cond); } ++#else ++ LIR_Condition cond() const { return _cond; } ++ void set_cond(LIR_Condition cond) { _cond = cond; } ++#endif ++ BasicType type() const { return _type; } + Label* label() const { return _label; } + BlockBegin* block() const { return _block; } + BlockBegin* ublock() const { return _ublock; } @@ -56595,10 +54183,11 @@ index 3234ca018..88cd3b24e 100644 class LIR_OpAllocArray : public LIR_Op { friend class LIR_OpVisitState; -@@ -1766,6 +1772,63 @@ class LIR_Op3: public LIR_Op { +@@ -1766,6 +1812,65 @@ class LIR_Op3: public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; ++#ifdef RISCV +class LIR_Op4: public LIR_Op { + friend class LIR_OpVisitState; + protected: @@ -56623,12 +54212,12 @@ index 3234ca018..88cd3b24e 100644 + , _opr3(opr3) + , _opr4(opr4) + , _type(type) -+ , _condition(condition) + , _tmp1(LIR_OprFact::illegalOpr) + , _tmp2(LIR_OprFact::illegalOpr) + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) -+ , _tmp5(LIR_OprFact::illegalOpr) { ++ , _tmp5(LIR_OprFact::illegalOpr) ++ , _condition(condition) { + assert(code == lir_cmove, "code check"); + assert(type != T_ILLEGAL, "cmove should have type"); + } @@ -56656,10 +54245,11 @@ index 3234ca018..88cd3b24e 100644 + + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; ++#endif //-------------------------------- class LabelObj: public CompilationResourceObj { -@@ -1988,6 +2051,10 @@ class LIR_List: public CompilationResourceObj { +@@ -1988,6 +2093,10 @@ class LIR_List: public CompilationResourceObj { const char * _file; int _line; #endif @@ -56670,7 +54260,7 @@ index 3234ca018..88cd3b24e 100644 public: void append(LIR_Op* op) { -@@ -2000,6 +2067,12 @@ class LIR_List: public CompilationResourceObj { +@@ -2000,6 +2109,12 @@ class LIR_List: public CompilationResourceObj { } #endif // PRODUCT @@ -56683,7 +54273,7 @@ index 3234ca018..88cd3b24e 100644 _operations.append(op); #ifdef ASSERT -@@ -2016,6 +2089,10 @@ class LIR_List: public CompilationResourceObj { +@@ -2016,6 +2131,10 @@ class LIR_List: public CompilationResourceObj { void set_file_and_line(const char * file, int line); #endif @@ -56694,37 +54284,44 @@ index 3234ca018..88cd3b24e 100644 //---------- accessors --------------- LIR_OpList* instructions_list() { return &_operations; } int length() const { return _operations.length(); } -@@ -2149,8 +2226,9 @@ class LIR_List: public CompilationResourceObj { +@@ -2149,9 +2268,16 @@ class LIR_List: public CompilationResourceObj { void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info); -- void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { -- append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type)); ++#ifdef RISCV + void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type, + LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) { + append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type)); ++ } ++#else + void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { + append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type)); } ++#endif void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, + LIR_Opr t1, LIR_Opr t2, LIR_Opr result = LIR_OprFact::illegalOpr); diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp -index 160483d5f..42a0350f7 100644 +index 160483d5f7..68aec26c1e 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp -@@ -709,10 +709,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { +@@ -709,9 +709,11 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op); break; -- case lir_cmove: -- cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type()); -- break; -- ++#ifndef RISCV + case lir_cmove: + cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type()); + break; ++#endif + case lir_shl: case lir_shr: - case lir_ushr: -@@ -776,6 +772,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { +@@ -776,6 +778,19 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { } } ++#ifdef RISCV +void LIR_Assembler::emit_op4(LIR_Op4* op) { + switch(op->code()) { + case lir_cmove: @@ -56736,47 +54333,64 @@ index 160483d5f..42a0350f7 100644 + break; + } +} ++#endif void LIR_Assembler::build_frame() { _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp -index 44a5bcbe5..406a58d21 100644 +index 44a5bcbe54..baeb4aa442 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp -@@ -190,6 +190,7 @@ class LIR_Assembler: public CompilationResourceObj { +@@ -190,6 +190,9 @@ class LIR_Assembler: public CompilationResourceObj { void emit_op1(LIR_Op1* op); void emit_op2(LIR_Op2* op); void emit_op3(LIR_Op3* op); ++#ifdef RISCV + void emit_op4(LIR_Op4* op); ++#endif void emit_opBranch(LIR_OpBranch* op); void emit_opLabel(LIR_OpLabel* op); void emit_arraycopy(LIR_OpArrayCopy* op); -@@ -222,7 +223,8 @@ class LIR_Assembler: public CompilationResourceObj { +@@ -222,8 +225,12 @@ class LIR_Assembler: public CompilationResourceObj { void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); -- void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); ++#ifdef RISCV + void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type, + LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr); - ++#else + void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); +- ++#endif void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); void ic_call( LIR_OpJavaCall* op); + void vtable_call( LIR_OpJavaCall* op); diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp -index c28055fd9..d00bfe91a 100644 +index acc969ac9c..512b63c744 100644 --- a/src/hotspot/share/c1/c1_LinearScan.cpp +++ b/src/hotspot/share/c1/c1_LinearScan.cpp -@@ -1242,8 +1242,8 @@ void LinearScan::add_register_hints(LIR_Op* op) { +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2005, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -1242,8 +1242,13 @@ void LinearScan::add_register_hints(LIR_Op* op) { break; } case lir_cmove: { -- assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2"); -- LIR_Op2* cmove = (LIR_Op2*)op; ++#ifdef RISCV + assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4"); + LIR_Op4* cmove = (LIR_Op4*)op; ++#else + assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2"); + LIR_Op2* cmove = (LIR_Op2*)op; ++#endif LIR_Opr move_from = cmove->in_opr1(); LIR_Opr move_to = cmove->result_opr(); -@@ -3140,6 +3140,9 @@ void LinearScan::do_linear_scan() { +@@ -3148,6 +3153,9 @@ void LinearScan::do_linear_scan() { } } @@ -56786,7 +54400,7 @@ index c28055fd9..d00bfe91a 100644 { TIME_LINEAR_SCAN(timer_optimize_lir); EdgeMoveOptimizer::optimize(ir()->code()); -@@ -3147,6 +3150,7 @@ void LinearScan::do_linear_scan() { +@@ -3155,6 +3163,7 @@ void LinearScan::do_linear_scan() { // check that cfg is still correct after optimizations ir()->verify(); } @@ -56794,60 +54408,41 @@ index c28055fd9..d00bfe91a 100644 NOT_PRODUCT(print_lir(1, "Before Code Generation", false)); NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final)); -@@ -6284,14 +6288,14 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { +@@ -6292,14 +6301,23 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { // There might be a cmove inserted for profiling which depends on the same // compare. If we change the condition of the respective compare, we have // to take care of this cmove as well. -- LIR_Op2* prev_cmove = NULL; ++#ifdef RISCV + LIR_Op4* prev_cmove = NULL; ++#else + LIR_Op2* prev_cmove = NULL; ++#endif for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) { prev_op = instructions->at(j); // check for the cmove if (prev_op->code() == lir_cmove) { -- assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2"); -- prev_cmove = (LIR_Op2*)prev_op; ++#ifdef RISCV + assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4"); + prev_cmove = (LIR_Op4*)prev_op; ++#else + assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2"); + prev_cmove = (LIR_Op2*)prev_op; ++#endif assert(prev_branch->cond() == prev_cmove->condition(), "should be the same"); } if (prev_op->code() == lir_cmp) { -diff --git a/src/hotspot/share/classfile/vmSymbols.cpp b/src/hotspot/share/classfile/vmSymbols.cpp -index 19fe196bc..d9cb8e999 100644 ---- a/src/hotspot/share/classfile/vmSymbols.cpp -+++ b/src/hotspot/share/classfile/vmSymbols.cpp -@@ -523,6 +523,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) { - case vmIntrinsics::_indexOfIU: - case vmIntrinsics::_indexOfIUL: - case vmIntrinsics::_indexOfU_char: -+ case vmIntrinsics::_indexOfL_char: - case vmIntrinsics::_compareToL: - case vmIntrinsics::_compareToU: - case vmIntrinsics::_compareToLU: -@@ -808,6 +809,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) { - case vmIntrinsics::_indexOfIU: - case vmIntrinsics::_indexOfIUL: - case vmIntrinsics::_indexOfU_char: -+ case vmIntrinsics::_indexOfL_char: - if (!SpecialStringIndexOf) return true; - break; - case vmIntrinsics::_equalsL: -diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp -index cef3f530c..a31525003 100644 ---- a/src/hotspot/share/classfile/vmSymbols.hpp -+++ b/src/hotspot/share/classfile/vmSymbols.hpp -@@ -946,6 +946,7 @@ - do_intrinsic(_indexOfIU, java_lang_StringUTF16, indexOf_name, indexOfI_signature, F_S) \ - do_intrinsic(_indexOfIUL, java_lang_StringUTF16, indexOfUL_name, indexOfI_signature, F_S) \ - do_intrinsic(_indexOfU_char, java_lang_StringUTF16, indexOfChar_name, indexOfChar_signature, F_S) \ -+ do_intrinsic(_indexOfL_char, java_lang_StringLatin1,indexOfChar_name, indexOfChar_signature, F_S) \ - do_name( indexOf_name, "indexOf") \ - do_name( indexOfChar_name, "indexOfChar") \ - do_name( indexOfUL_name, "indexOfLatin1") \ diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp -index 4771a8b86..295f82ccc 100644 +index 4771a8b865..6d377fa005 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2018, 2021, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2018, 2022, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -31,7 +31,7 @@ #include "utilities/defaultStream.hpp" @@ -56857,8 +54452,31 @@ index 4771a8b86..295f82ccc 100644 vm_exit_during_initialization("Shenandoah GC is not supported on this platform."); #endif +diff --git a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +index 9f8ce74243..125cc169be 100644 +--- a/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp ++++ b/src/hotspot/share/gc/z/c1/zBarrierSetC1.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -100,7 +100,11 @@ private: + + public: + LIR_OpZLoadBarrierTest(LIR_Opr opr) : ++#ifdef RISCV ++ LIR_Op(lir_zloadbarrier_test, LIR_OprFact::illegalOpr, NULL), ++#else + LIR_Op(), ++#endif + _opr(opr) {} + + virtual void visit(LIR_OpVisitState* state) { diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp -index e01a242a5..ff16de0e7 100644 +index e01a242a57..ff16de0e77 100644 --- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp @@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) { @@ -56870,1038 +54488,11 @@ index e01a242a5..ff16de0e7 100644 return false; #else #warning "Unconfigured platform" -diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp -index 7768615b7..ef006f087 100644 ---- a/src/hotspot/share/opto/c2compiler.cpp -+++ b/src/hotspot/share/opto/c2compiler.cpp -@@ -510,6 +510,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt - case vmIntrinsics::_indexOfIU: - case vmIntrinsics::_indexOfIUL: - case vmIntrinsics::_indexOfU_char: -+ case vmIntrinsics::_indexOfL_char: - case vmIntrinsics::_toBytesStringU: - case vmIntrinsics::_getCharsStringU: - case vmIntrinsics::_getCharStringU: -diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp -index 500054218..fafbde78d 100644 ---- a/src/hotspot/share/opto/chaitin.cpp -+++ b/src/hotspot/share/opto/chaitin.cpp -@@ -77,6 +77,7 @@ void LRG::dump() const { - if( _is_oop ) tty->print("Oop "); - if( _is_float ) tty->print("Float "); - if( _is_vector ) tty->print("Vector "); -+ if( _is_scalable ) tty->print("Scalable "); - if( _was_spilled1 ) tty->print("Spilled "); - if( _was_spilled2 ) tty->print("Spilled2 "); - if( _direct_conflict ) tty->print("Direct_conflict "); -@@ -591,6 +592,7 @@ void PhaseChaitin::Register_Allocate() { - - // Merge multidefs if multiple defs representing the same value are used in a single block. - merge_multidefs(); -+ merge_debugdefs(); - - #ifdef ASSERT - // Veify the graph after RA. -@@ -646,7 +648,15 @@ void PhaseChaitin::Register_Allocate() { - // Live ranges record the highest register in their mask. - // We want the low register for the AD file writer's convenience. - OptoReg::Name hi = lrg.reg(); // Get hi register -- OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo -+ int num_regs = lrg.num_regs(); -+ if (lrg.is_scalable() && OptoReg::is_stack(hi)) { -+ // For scalable vector registers, when they are allocated in physical -+ // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable -+ // vector. If they are allocated on stack, we need to get the actual -+ // num_regs, which reflects the physical length of scalable registers. -+ num_regs = lrg.scalable_reg_slots(); -+ } -+ OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo - // We have to use pair [lo,lo+1] even for wide vectors because - // the rest of code generation works only with pairs. It is safe - // since for registers encoding only 'lo' is used. -@@ -801,8 +811,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { - // Check for vector live range (only if vector register is used). - // On SPARC vector uses RegD which could be misaligned so it is not - // processes as vector in RA. -- if (RegMask::is_vector(ireg)) -+ if (RegMask::is_vector(ireg)) { - lrg._is_vector = 1; -+ if (ireg == Op_VecA) { -+ assert(Matcher::supports_scalable_vector(), "scalable vector should be supported"); -+ lrg._is_scalable = 1; -+ // For scalable vector, when it is allocated in physical register, -+ // num_regs is RegMask::SlotsPerVecA for reg mask, -+ // which may not be the actual physical register size. -+ // If it is allocated in stack, we need to get the actual -+ // physical length of scalable vector register. -+ lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT)); -+ } -+ } - assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL, - "vector must be in vector registers"); - -@@ -912,6 +933,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { - lrg.set_reg_pressure(1); - #endif - break; -+ case Op_VecA: -+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); -+ assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity"); -+ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned"); -+ lrg.set_num_regs(RegMask::SlotsPerVecA); -+ lrg.set_reg_pressure(1); -+ break; - case Op_VecS: - assert(Matcher::vector_size_supported(T_BYTE,4), "sanity"); - assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity"); -@@ -1358,6 +1386,47 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) { - return false; - } - -+static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) { -+ int num_regs = lrg.num_regs(); -+ OptoReg::Name assigned = mask.find_first_set(lrg, num_regs); -+ -+ if (lrg.is_scalable()) { -+ // a physical register is found -+ if (chunk == 0 && OptoReg::is_reg(assigned)) { -+ return assigned; -+ } -+ -+ // find available stack slots for scalable register -+ if (lrg._is_vector) { -+ num_regs = lrg.scalable_reg_slots(); -+ // if actual scalable vector register is exactly SlotsPerVecA * 32 bits -+ if (num_regs == RegMask::SlotsPerVecA) { -+ return assigned; -+ } -+ -+ // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it -+ // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits -+ // instead of SlotsPerVecA bits. -+ assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg -+ while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) { -+ // Verify the found reg has scalable_reg_slots() bits set. -+ if (mask.is_valid_reg(assigned, num_regs)) { -+ return assigned; -+ } else { -+ // Remove more for each iteration -+ mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg -+ mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits -+ assigned = mask.find_first_set(lrg, num_regs); -+ } -+ } -+ return OptoReg::Bad; // will cause chunk change, and retry next chunk -+ } -+ } -+ -+ return assigned; -+} -+ -+ - // Choose a color using the biasing heuristic - OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { - -@@ -1391,7 +1460,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { - RegMask tempmask = lrg.mask(); - tempmask.AND(lrgs(copy_lrg).mask()); - tempmask.clear_to_sets(lrg.num_regs()); -- OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs()); -+ OptoReg::Name reg = find_first_set(lrg, tempmask, chunk); - if (OptoReg::is_valid(reg)) - return reg; - } -@@ -1400,7 +1469,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { - // If no bias info exists, just go with the register selection ordering - if (lrg._is_vector || lrg.num_regs() == 2) { - // Find an aligned set -- return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk); -+ return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk); - } - - // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate -@@ -1564,12 +1633,21 @@ uint PhaseChaitin::Select( ) { - int n_regs = lrg->num_regs(); - assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity"); - if (n_regs == 1 || !lrg->_fat_proj) { -- assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); -+ if (Matcher::supports_scalable_vector()) { -+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity"); -+ } else { -+ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); -+ } - lrg->Clear(); // Clear the mask - lrg->Insert(reg); // Set regmask to match selected reg - // For vectors and pairs, also insert the low bit of the pair -- for (int i = 1; i < n_regs; i++) -+ // We always choose the high bit, then mask the low bits by register size -+ if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack -+ n_regs = lrg->scalable_reg_slots(); -+ } -+ for (int i = 1; i < n_regs; i++) { - lrg->Insert(OptoReg::add(reg,-i)); -+ } - lrg->set_mask_size(n_regs); - } else { // Else fatproj - // mask must be equal to fatproj bits, by definition -diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp -index e5be5b966..b5d1b0604 100644 ---- a/src/hotspot/share/opto/chaitin.hpp -+++ b/src/hotspot/share/opto/chaitin.hpp -@@ -115,9 +115,11 @@ public: - _msize_valid=1; - if (_is_vector) { - assert(!_fat_proj, "sanity"); -- _mask.verify_sets(_num_regs); -+ if (!(_is_scalable && OptoReg::is_stack(_reg))) { -+ assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets"); -+ } - } else if (_num_regs == 2 && !_fat_proj) { -- _mask.verify_pairs(); -+ assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs"); - } - #endif - } -@@ -143,10 +145,34 @@ public: - private: - uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else - // except _num_regs is kill count for fat_proj -+ -+ // For scalable register, num_regs may not be the actual physical register size. -+ // We need to get the actual physical length of scalable register when scalable -+ // register is spilled. The size of one slot is 32-bit. -+ uint _scalable_reg_slots; // Actual scalable register length of slots. -+ // Meaningful only when _is_scalable is true. - public: - int num_regs() const { return _num_regs; } - void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; } - -+ uint scalable_reg_slots() { return _scalable_reg_slots; } -+ void set_scalable_reg_slots(uint slots) { -+ assert(_is_scalable, "scalable register"); -+ assert(slots > 0, "slots of scalable register is not valid"); -+ _scalable_reg_slots = slots; -+ } -+ -+ bool is_scalable() { -+#ifdef ASSERT -+ if (_is_scalable) { -+ // Should only be a vector for now, but it could also be a RegVMask in future. -+ assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg"); -+ } -+#endif -+ return _is_scalable; -+ } -+ -+ - private: - // Number of physical registers this live range uses when it colors - // Architecture and register-set dependent -@@ -172,6 +198,7 @@ public: - uint _is_oop:1, // Live-range holds an oop - _is_float:1, // True if in float registers - _is_vector:1, // True if in vector registers -+ _is_scalable:1, // True if register size is scalable - _was_spilled1:1, // True if prior spilling on def - _was_spilled2:1, // True if twice prior spilling on def - _is_bound:1, // live range starts life with no -@@ -756,6 +783,7 @@ private: - - // Merge nodes that are a part of a multidef lrg and produce the same value within a block. - void merge_multidefs(); -+ void merge_debugdefs(); - - private: - -diff --git a/src/hotspot/share/opto/intrinsicnode.hpp b/src/hotspot/share/opto/intrinsicnode.hpp -index c0dfe1b0c..2d9526a39 100644 ---- a/src/hotspot/share/opto/intrinsicnode.hpp -+++ b/src/hotspot/share/opto/intrinsicnode.hpp -@@ -47,10 +47,11 @@ class PartialSubtypeCheckNode : public Node { - // Base class for Ideal nodes used in String intrinsic code. - class StrIntrinsicNode: public Node { - public: -- // Possible encodings of the two parameters passed to the string intrinsic. -+ // Possible encodings of the parameters passed to the string intrinsic. - // 'L' stands for Latin1 and 'U' stands for UTF16. For example, 'LU' means that - // the first string is Latin1 encoded and the second string is UTF16 encoded. -- typedef enum ArgEncoding { LL, LU, UL, UU, none } ArgEnc; -+ // 'L' means that the single string is Latin1 encoded -+ typedef enum ArgEncoding { LL, LU, UL, UU, L, U, none } ArgEnc; - - protected: - // Encoding of strings. Used to select the right version of the intrinsic. -diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp -index 6b6aa9e9b..8719c5b12 100644 ---- a/src/hotspot/share/opto/library_call.cpp -+++ b/src/hotspot/share/opto/library_call.cpp -@@ -217,7 +217,7 @@ class LibraryCallKit : public GraphKit { - bool inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae); - Node* make_indexOf_node(Node* src_start, Node* src_count, Node* tgt_start, Node* tgt_count, - RegionNode* region, Node* phi, StrIntrinsicNode::ArgEnc ae); -- bool inline_string_indexOfChar(); -+ bool inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae); - bool inline_string_equals(StrIntrinsicNode::ArgEnc ae); - bool inline_string_toBytesU(); - bool inline_string_getCharsU(); -@@ -590,7 +590,8 @@ bool LibraryCallKit::try_to_inline(int predicate) { - case vmIntrinsics::_indexOfIL: return inline_string_indexOfI(StrIntrinsicNode::LL); - case vmIntrinsics::_indexOfIU: return inline_string_indexOfI(StrIntrinsicNode::UU); - case vmIntrinsics::_indexOfIUL: return inline_string_indexOfI(StrIntrinsicNode::UL); -- case vmIntrinsics::_indexOfU_char: return inline_string_indexOfChar(); -+ case vmIntrinsics::_indexOfU_char: return inline_string_indexOfChar(StrIntrinsicNode::U); -+ case vmIntrinsics::_indexOfL_char: return inline_string_indexOfChar(StrIntrinsicNode::L); - - case vmIntrinsics::_equalsL: return inline_string_equals(StrIntrinsicNode::LL); - case vmIntrinsics::_equalsU: return inline_string_equals(StrIntrinsicNode::UU); -@@ -1419,7 +1420,7 @@ Node* LibraryCallKit::make_indexOf_node(Node* src_start, Node* src_count, Node* - } - - //-----------------------------inline_string_indexOfChar----------------------- --bool LibraryCallKit::inline_string_indexOfChar() { -+bool LibraryCallKit::inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae) { - if (too_many_traps(Deoptimization::Reason_intrinsic)) { - return false; - } -@@ -1434,12 +1435,12 @@ bool LibraryCallKit::inline_string_indexOfChar() { - - src = must_be_not_null(src, true); - -- Node* src_offset = _gvn.transform(new LShiftINode(from_index, intcon(1))); -+ Node* src_offset = ae == StrIntrinsicNode::L ? from_index : _gvn.transform(new LShiftINode(from_index, intcon(1))); - Node* src_start = array_element_address(src, src_offset, T_BYTE); - Node* src_count = _gvn.transform(new SubINode(max, from_index)); - - // Range checks -- generate_string_range_check(src, src_offset, src_count, true); -+ generate_string_range_check(src, src_offset, src_count, ae == StrIntrinsicNode::U); - if (stopped()) { - return true; - } -@@ -1447,7 +1448,7 @@ bool LibraryCallKit::inline_string_indexOfChar() { - RegionNode* region = new RegionNode(3); - Node* phi = new PhiNode(region, TypeInt::INT); - -- Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, StrIntrinsicNode::none); -+ Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, ae); - C->set_has_split_ifs(true); // Has chance for split-if optimization - _gvn.transform(result); - -diff --git a/src/hotspot/share/opto/machnode.cpp b/src/hotspot/share/opto/machnode.cpp -index 8d526b15d..92b4f7158 100644 ---- a/src/hotspot/share/opto/machnode.cpp -+++ b/src/hotspot/share/opto/machnode.cpp -@@ -147,7 +147,7 @@ uint MachNode::size(PhaseRegAlloc *ra_) const { - return MachNode::emit_size(ra_); - } - --//------------------------------size------------------------------------------- -+//-------------------------emit_size------------------------------------------- - // Helper function that computes size by emitting code - uint MachNode::emit_size(PhaseRegAlloc *ra_) const { - // Emit into a trash buffer and count bytes emitted. -diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp -index a52325680..dad70565b 100644 ---- a/src/hotspot/share/opto/machnode.hpp -+++ b/src/hotspot/share/opto/machnode.hpp -@@ -334,6 +334,10 @@ public: - // Top-level ideal Opcode matched - virtual int ideal_Opcode() const { return Op_Node; } - -+ virtual bool is_Opcode_equal(Node* node) { -+ return node->is_Mach() && (ideal_Opcode() == node->as_Mach()->ideal_Opcode()); -+ } -+ - // Adds the label for the case - virtual void add_case_label( int switch_val, Label* blockLabel); - -diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp -index 9e9b3383f..97de5e314 100644 ---- a/src/hotspot/share/opto/matcher.cpp -+++ b/src/hotspot/share/opto/matcher.cpp -@@ -84,6 +84,7 @@ Matcher::Matcher() - idealreg2spillmask [Op_RegF] = NULL; - idealreg2spillmask [Op_RegD] = NULL; - idealreg2spillmask [Op_RegP] = NULL; -+ idealreg2spillmask [Op_VecA] = NULL; - idealreg2spillmask [Op_VecS] = NULL; - idealreg2spillmask [Op_VecD] = NULL; - idealreg2spillmask [Op_VecX] = NULL; -@@ -110,6 +111,7 @@ Matcher::Matcher() - idealreg2mhdebugmask[Op_RegF] = NULL; - idealreg2mhdebugmask[Op_RegD] = NULL; - idealreg2mhdebugmask[Op_RegP] = NULL; -+ idealreg2mhdebugmask[Op_VecA] = NULL; - idealreg2mhdebugmask[Op_VecS] = NULL; - idealreg2mhdebugmask[Op_VecD] = NULL; - idealreg2mhdebugmask[Op_VecX] = NULL; -@@ -424,7 +426,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) { - void Matcher::init_first_stack_mask() { - - // Allocate storage for spill masks as masks for the appropriate load type. -- RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+5)); -+ RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+6)); - - idealreg2spillmask [Op_RegN] = &rms[0]; - idealreg2spillmask [Op_RegI] = &rms[1]; -@@ -447,11 +449,12 @@ void Matcher::init_first_stack_mask() { - idealreg2mhdebugmask[Op_RegD] = &rms[16]; - idealreg2mhdebugmask[Op_RegP] = &rms[17]; - -- idealreg2spillmask [Op_VecS] = &rms[18]; -- idealreg2spillmask [Op_VecD] = &rms[19]; -- idealreg2spillmask [Op_VecX] = &rms[20]; -- idealreg2spillmask [Op_VecY] = &rms[21]; -- idealreg2spillmask [Op_VecZ] = &rms[22]; -+ idealreg2spillmask [Op_VecA] = &rms[18]; -+ idealreg2spillmask [Op_VecS] = &rms[19]; -+ idealreg2spillmask [Op_VecD] = &rms[20]; -+ idealreg2spillmask [Op_VecX] = &rms[21]; -+ idealreg2spillmask [Op_VecY] = &rms[22]; -+ idealreg2spillmask [Op_VecZ] = &rms[23]; - - OptoReg::Name i; - -@@ -478,6 +481,7 @@ void Matcher::init_first_stack_mask() { - // Keep spill masks aligned. - aligned_stack_mask.clear_to_pairs(); - assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); -+ RegMask scalable_stack_mask = aligned_stack_mask; - - *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; - #ifdef _LP64 -@@ -548,6 +552,26 @@ void Matcher::init_first_stack_mask() { - *idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ]; - idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask); - } -+ -+ if (Matcher::supports_scalable_vector()) { -+ int k = 1; -+ OptoReg::Name in = OptoReg::add(_in_arg_limit, -1); -+ // Exclude last input arg stack slots to avoid spilling vector register there, -+ // otherwise vector spills could stomp over stack slots in caller frame. -+ for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) { -+ scalable_stack_mask.Remove(in); -+ in = OptoReg::add(in, -1); -+ } -+ -+ // For VecA -+ scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA); -+ assert(scalable_stack_mask.is_AllStack(), "should be infinite stack"); -+ *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA]; -+ idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask); -+ } else { -+ *idealreg2spillmask[Op_VecA] = RegMask::Empty; -+ } -+ - if (UseFPUForSpilling) { - // This mask logic assumes that the spill operations are - // symmetric and that the registers involved are the same size. -@@ -872,6 +896,11 @@ void Matcher::init_spill_mask( Node *ret ) { - idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); - - // Vector regmasks. -+ if (Matcher::supports_scalable_vector()) { -+ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));; -+ MachNode *spillVectA = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTA)); -+ idealreg2regmask[Op_VecA] = &spillVectA->out_RegMask(); -+ } - if (Matcher::vector_size_supported(T_BYTE,4)) { - TypeVect::VECTS = TypeVect::make(T_BYTE, 4); - MachNode *spillVectS = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS)); -diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp -index 244e3d1f8..9a8307102 100644 ---- a/src/hotspot/share/opto/matcher.hpp -+++ b/src/hotspot/share/opto/matcher.hpp -@@ -310,7 +310,7 @@ public: - - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen -- static const bool match_rule_supported_vector(int opcode, int vlen); -+ static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt); - - // Some microarchitectures have mask registers used on vectors - static const bool has_predicated_vectors(void); -@@ -333,6 +333,10 @@ public: - Matcher::min_vector_size(bt) <= size); - } - -+ static const bool supports_scalable_vector(); -+ // Actual max scalable vector register length. -+ static const int scalable_vector_reg_size(const BasicType bt); -+ - // Vector ideal reg - static const uint vector_ideal_reg(int len); - static const uint vector_shift_count_ideal_reg(int len); -diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp -index 02bb6bb16..99d51ba05 100644 ---- a/src/hotspot/share/opto/node.cpp -+++ b/src/hotspot/share/opto/node.cpp -@@ -2359,6 +2359,27 @@ Node* Node::find_similar(int opc) { - return NULL; - } - -+//--------------------------is_similar----------------------------------- -+// True if a node has the same opcode and inputs as "this". -+bool Node::is_similar(Node* node) { -+ if (this == node) { -+ return true; -+ } else { -+ if (is_Opcode_equal(node) && (req() == node->req())) { -+ for (uint i = 0; i < node->req(); i++) { -+ if (in(i) != node->in(i)) { -+ return false; -+ } -+ } -+ return true; -+ } -+ } -+ return false; -+} -+ -+bool Node::is_Opcode_equal(Node* node) { -+ return Opcode() == node->Opcode(); -+} - - //--------------------------unique_ctrl_out------------------------------ - // Return the unique control out if only one. Null if none or more than one. -diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp -index 0c0b9bf69..e24456d85 100644 ---- a/src/hotspot/share/opto/node.hpp -+++ b/src/hotspot/share/opto/node.hpp -@@ -1030,6 +1030,11 @@ public: - // be found; Otherwise return NULL; - Node* find_similar(int opc); - -+ // True if a node has the same opcode and inputs as "this". -+ bool is_similar(Node* node); -+ -+ virtual bool is_Opcode_equal(Node* node); -+ - // Return the unique control out if only one. Null if none or more than one. - Node* unique_ctrl_out() const; - -diff --git a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp -index e31e8d847..aa0483c73 100644 ---- a/src/hotspot/share/opto/opcodes.cpp -+++ b/src/hotspot/share/opto/opcodes.cpp -@@ -38,12 +38,14 @@ const char *NodeClassNames[] = { - "RegF", - "RegD", - "RegL", -- "RegFlags", -+ "VecA", - "VecS", - "VecD", - "VecX", - "VecY", - "VecZ", -+ "RegVMask", -+ "RegFlags", - "_last_machine_leaf", - #include "classes.hpp" - "_last_class_name", -diff --git a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp -index ae3d61ce0..0a77c3732 100644 ---- a/src/hotspot/share/opto/opcodes.hpp -+++ b/src/hotspot/share/opto/opcodes.hpp -@@ -37,11 +37,13 @@ enum Opcodes { - macro(RegF) // Machine float register - macro(RegD) // Machine double register - macro(RegL) // Machine long register -+ macro(VecA) // Machine vectora register - macro(VecS) // Machine vectors register - macro(VecD) // Machine vectord register - macro(VecX) // Machine vectorx register - macro(VecY) // Machine vectory register - macro(VecZ) // Machine vectorz register -+ macro(RegVMask) // Vector mask/predicate register - macro(RegFlags) // Machine flags register - _last_machine_leaf, // Split between regular opcodes and machine - #include "classes.hpp" -diff --git a/src/hotspot/share/opto/phase.cpp b/src/hotspot/share/opto/phase.cpp -index 397a53713..89c7fc7c8 100644 ---- a/src/hotspot/share/opto/phase.cpp -+++ b/src/hotspot/share/opto/phase.cpp -@@ -113,6 +113,7 @@ void Phase::print_timers() { - tty->print_cr (" Regalloc Split: %7.3f s", timers[_t_regAllocSplit].seconds()); - tty->print_cr (" Postalloc Copy Rem: %7.3f s", timers[_t_postAllocCopyRemoval].seconds()); - tty->print_cr (" Merge multidefs: %7.3f s", timers[_t_mergeMultidefs].seconds()); -+ tty->print_cr (" Merge debugdefs: %7.3f s", timers[_t_mergeDebugdefs].seconds()); - tty->print_cr (" Fixup Spills: %7.3f s", timers[_t_fixupSpills].seconds()); - tty->print_cr (" Compact: %7.3f s", timers[_t_chaitinCompact].seconds()); - tty->print_cr (" Coalesce 1: %7.3f s", timers[_t_chaitinCoalesce1].seconds()); -@@ -130,6 +131,7 @@ void Phase::print_timers() { - timers[_t_regAllocSplit].seconds() + - timers[_t_postAllocCopyRemoval].seconds() + - timers[_t_mergeMultidefs].seconds() + -+ timers[_t_mergeDebugdefs].seconds() + - timers[_t_fixupSpills].seconds() + - timers[_t_chaitinCompact].seconds() + - timers[_t_chaitinCoalesce1].seconds() + -diff --git a/src/hotspot/share/opto/phase.hpp b/src/hotspot/share/opto/phase.hpp -index 4b0c53ffc..b3302ec86 100644 ---- a/src/hotspot/share/opto/phase.hpp -+++ b/src/hotspot/share/opto/phase.hpp -@@ -91,6 +91,7 @@ public: - _t_regAllocSplit, - _t_postAllocCopyRemoval, - _t_mergeMultidefs, -+ _t_mergeDebugdefs, - _t_fixupSpills, - _t_chaitinCompact, - _t_chaitinCoalesce1, -diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp -index 46766b604..3f608bb40 100644 ---- a/src/hotspot/share/opto/postaloc.cpp -+++ b/src/hotspot/share/opto/postaloc.cpp -@@ -27,6 +27,7 @@ - #include "memory/resourceArea.hpp" - #include "opto/chaitin.hpp" - #include "opto/machnode.hpp" -+#include "opto/addnode.hpp" - - // See if this register (or pairs, or vector) already contains the value. - static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs, -@@ -266,9 +267,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v - Node *val = skip_copies(n->in(k)); - if (val == x) return blk_adjust; // No progress? - -- int n_regs = RegMask::num_registers(val->ideal_reg()); - uint val_idx = _lrg_map.live_range_id(val); - OptoReg::Name val_reg = lrgs(val_idx).reg(); -+ int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx)); - - // See if it happens to already be in the correct register! - // (either Phi's direct register, or the common case of the name -@@ -305,8 +306,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v - } - - Node *vv = value[reg]; -+ // For scalable register, number of registers may be inconsistent between -+ // "val_reg" and "reg". For example, when "val" resides in register -+ // but "reg" is located in stack. -+ if (lrgs(val_idx).is_scalable()) { -+ assert(val->ideal_reg() == Op_VecA, "scalable vector register"); -+ if (OptoReg::is_stack(reg)) { -+ n_regs = lrgs(val_idx).scalable_reg_slots(); -+ } else { -+ n_regs = RegMask::SlotsPerVecA; -+ } -+ } - if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set -- uint last = (n_regs-1); // Looking for the last part of a set -+ uint last; -+ if (lrgs(val_idx).is_scalable()) { -+ assert(val->ideal_reg() == Op_VecA, "scalable vector register"); -+ // For scalable vector register, regmask is always SlotsPerVecA bits aligned -+ last = RegMask::SlotsPerVecA - 1; -+ } else { -+ last = (n_regs-1); // Looking for the last part of a set -+ } - if ((reg&last) != last) continue; // Wrong part of a set - if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value - } -@@ -410,6 +429,28 @@ void PhaseChaitin::merge_multidefs() { - } - } - -+void PhaseChaitin::merge_debugdefs() { -+ Compile::TracePhase tp("merge_Debugdefs", &timers[_t_mergeDebugdefs]); -+ -+ ResourceMark rm; -+ for (uint i = 0; i < _cfg.number_of_blocks(); i++) { -+ Block* block = _cfg.get_block(i); -+ for (int j = 0; j < (int) block->number_of_nodes(); j++) { -+ Node* base = block->get_node(j); -+ if (base && base->is_Mach() && base->outcnt() == 1) { -+ Node* addp = base->unique_out(); -+ if (addp && addp->is_Mach() && addp->as_Mach()->ideal_Opcode() == Op_AddP) { -+ Node* derived = addp->in(AddPNode::Address); -+ if (base == addp->in(AddPNode::Base) && base->is_similar(derived)) { -+ base->subsume_by(derived, Compile::current()); -+ block->remove_node(j--); -+ } -+ } -+ } -+ } -+ } -+} -+ - int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) { - int blk_adjust = 0; - -@@ -591,7 +632,7 @@ void PhaseChaitin::post_allocate_copy_removal() { - uint k; - Node *phi = block->get_node(j); - uint pidx = _lrg_map.live_range_id(phi); -- OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg(); -+ OptoReg::Name preg = lrgs(pidx).reg(); - - // Remove copies remaining on edges. Check for junk phi. - Node *u = NULL; -@@ -619,7 +660,7 @@ void PhaseChaitin::post_allocate_copy_removal() { - if( pidx ) { - value.map(preg,phi); - regnd.map(preg,phi); -- int n_regs = RegMask::num_registers(phi->ideal_reg()); -+ int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx)); - for (int l = 1; l < n_regs; l++) { - OptoReg::Name preg_lo = OptoReg::add(preg,-l); - value.map(preg_lo,phi); -@@ -663,7 +704,7 @@ void PhaseChaitin::post_allocate_copy_removal() { - regnd.map(ureg, def); - // Record other half of doubles - uint def_ideal_reg = def->ideal_reg(); -- int n_regs = RegMask::num_registers(def_ideal_reg); -+ int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def))); - for (int l = 1; l < n_regs; l++) { - OptoReg::Name ureg_lo = OptoReg::add(ureg,-l); - if (!value[ureg_lo] && -@@ -707,7 +748,7 @@ void PhaseChaitin::post_allocate_copy_removal() { - } - - uint n_ideal_reg = n->ideal_reg(); -- int n_regs = RegMask::num_registers(n_ideal_reg); -+ int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx)); - if (n_regs == 1) { - // If Node 'n' does not change the value mapped by the register, - // then 'n' is a useless copy. Do not update the register->node -diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp -index 2e04c42eb..34a701e84 100644 ---- a/src/hotspot/share/opto/regmask.cpp -+++ b/src/hotspot/share/opto/regmask.cpp -@@ -24,6 +24,7 @@ - - #include "precompiled.hpp" - #include "opto/ad.hpp" -+#include "opto/chaitin.hpp" - #include "opto/compile.hpp" - #include "opto/matcher.hpp" - #include "opto/node.hpp" -@@ -116,30 +117,47 @@ const RegMask RegMask::Empty( - - //============================================================================= - bool RegMask::is_vector(uint ireg) { -- return (ireg == Op_VecS || ireg == Op_VecD || -+ return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD || - ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ); - } - - int RegMask::num_registers(uint ireg) { - switch(ireg) { - case Op_VecZ: -- return 16; -+ return SlotsPerVecZ; - case Op_VecY: -- return 8; -+ return SlotsPerVecY; - case Op_VecX: -- return 4; -+ return SlotsPerVecX; - case Op_VecD: -+ return SlotsPerVecD; - case Op_RegD: - case Op_RegL: - #ifdef _LP64 - case Op_RegP: - #endif - return 2; -+ case Op_VecA: -+ assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); -+ return SlotsPerVecA; - } - // Op_VecS and the rest ideal registers. - return 1; - } - -+int RegMask::num_registers(uint ireg, LRG &lrg) { -+ int n_regs = num_registers(ireg); -+ -+ // assigned is OptoReg which is selected by register allocator -+ OptoReg::Name assigned = lrg.reg(); -+ assert(OptoReg::is_valid(assigned), "should be valid opto register"); -+ -+ if (lrg.is_scalable() && OptoReg::is_stack(assigned)) { -+ n_regs = lrg.scalable_reg_slots(); -+ } -+ return n_regs; -+} -+ - //------------------------------find_first_pair-------------------------------- - // Find the lowest-numbered register pair in the mask. Return the - // HIGHEST register number in the pair, or BAD if no pairs. -@@ -238,14 +256,30 @@ int RegMask::is_bound_pair() const { - return true; - } - -+// Check that whether given reg number with size is valid -+// for current regmask, where reg is the highest number. -+bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const { -+ for (int i = 0; i < size; i++) { -+ if (!Member(reg - i)) { -+ return false; -+ } -+ } -+ return true; -+} -+ - // only indicies of power 2 are accessed, so index 3 is only filled in for storage. - static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 }; - //------------------------------find_first_set--------------------------------- - // Find the lowest-numbered register set in the mask. Return the - // HIGHEST register number in the set, or BAD if no sets. - // Works also for size 1. --OptoReg::Name RegMask::find_first_set(const int size) const { -- verify_sets(size); -+OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const { -+ if (lrg.is_scalable()) { -+ // For scalable vector register, regmask is SlotsPerVecA bits aligned. -+ assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets"); -+ } else { -+ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); -+ } - for (int i = 0; i < RM_SIZE; i++) { - if (_A[i]) { // Found some bits - int bit = _A[i] & -_A[i]; // Extract low bit -diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp -index c64d08795..2688275be 100644 ---- a/src/hotspot/share/opto/regmask.hpp -+++ b/src/hotspot/share/opto/regmask.hpp -@@ -28,6 +28,8 @@ - #include "code/vmreg.hpp" - #include "opto/optoreg.hpp" - -+class LRG; -+ - // Some fun naming (textual) substitutions: - // - // RegMask::get_low_elem() ==> RegMask::find_first_elem() -@@ -95,6 +97,7 @@ public: - // requirement is internal to the allocator, and independent of any - // particular platform. - enum { SlotsPerLong = 2, -+ SlotsPerVecA = RISCV_ONLY(4) NOT_RISCV(8), - SlotsPerVecS = 1, - SlotsPerVecD = 2, - SlotsPerVecX = 4, -@@ -204,10 +207,14 @@ public: - return false; - } - -+ // Check that whether given reg number with size is valid -+ // for current regmask, where reg is the highest number. -+ bool is_valid_reg(OptoReg::Name reg, const int size) const; -+ - // Find the lowest-numbered register set in the mask. Return the - // HIGHEST register number in the set, or BAD if no sets. - // Assert that the mask contains only bit sets. -- OptoReg::Name find_first_set(const int size) const; -+ OptoReg::Name find_first_set(LRG &lrg, const int size) const; - - // Clear out partial bits; leave only aligned adjacent bit sets of size. - void clear_to_sets(const int size); -@@ -226,6 +233,7 @@ public: - - static bool is_vector(uint ireg); - static int num_registers(uint ireg); -+ static int num_registers(uint ireg, LRG &lrg); - - // Fast overlap test. Non-zero if any registers in common. - int overlap( const RegMask &rm ) const { -diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp -index fed52e488..ee583236f 100644 ---- a/src/hotspot/share/opto/superword.cpp -+++ b/src/hotspot/share/opto/superword.cpp -@@ -96,8 +96,11 @@ static const bool _do_vector_loop_experimental = false; // Experimental vectoriz - //------------------------------transform_loop--------------------------- - void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { - assert(UseSuperWord, "should be"); -- // Do vectors exist on this architecture? -- if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; -+ // SuperWord only works with power of two vector sizes. -+ int vector_width = Matcher::vector_width_in_bytes(T_BYTE); -+ if (vector_width < 2 || !is_power_of_2(vector_width)) { -+ return; -+ } - - assert(lpt->_head->is_CountedLoop(), "must be"); - CountedLoopNode *cl = lpt->_head->as_CountedLoop(); -diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp -index 7d767c47c..c9948df5f 100644 ---- a/src/hotspot/share/opto/type.cpp -+++ b/src/hotspot/share/opto/type.cpp -@@ -79,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = { - { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY - { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ - #else // all other -+ { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA - { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS - { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD - { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX -@@ -655,6 +656,10 @@ void Type::Initialize_shared(Compile* current) { - // get_zero_type() should not happen for T_CONFLICT - _zero_type[T_CONFLICT]= NULL; - -+ if (Matcher::supports_scalable_vector()) { -+ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE)); -+ } -+ - // Vector predefined types, it needs initialized _const_basic_type[]. - if (Matcher::vector_size_supported(T_BYTE,4)) { - TypeVect::VECTS = TypeVect::make(T_BYTE,4); -@@ -671,6 +676,7 @@ void Type::Initialize_shared(Compile* current) { - if (Matcher::vector_size_supported(T_FLOAT,16)) { - TypeVect::VECTZ = TypeVect::make(T_FLOAT,16); - } -+ mreg2type[Op_VecA] = TypeVect::VECTA; - mreg2type[Op_VecS] = TypeVect::VECTS; - mreg2type[Op_VecD] = TypeVect::VECTD; - mreg2type[Op_VecX] = TypeVect::VECTX; -@@ -990,6 +996,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = { - - Bad, // Tuple - handled in v-call - Bad, // Array - handled in v-call -+ Bad, // VectorA - handled in v-call - Bad, // VectorS - handled in v-call - Bad, // VectorD - handled in v-call - Bad, // VectorX - handled in v-call -@@ -2329,6 +2336,7 @@ bool TypeAry::ary_must_be_exact() const { - - //==============================TypeVect======================================= - // Convenience common pre-built types. -+const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic - const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors - const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors - const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors -@@ -2339,10 +2347,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors - const TypeVect* TypeVect::make(const Type *elem, uint length) { - BasicType elem_bt = elem->array_element_basic_type(); - assert(is_java_primitive(elem_bt), "only primitive types in vector"); -- assert(length > 1 && is_power_of_2(length), "vector length is power of 2"); - assert(Matcher::vector_size_supported(elem_bt, length), "length in range"); - int size = length * type2aelembytes(elem_bt); - switch (Matcher::vector_ideal_reg(size)) { -+ case Op_VecA: -+ return (TypeVect*)(new TypeVectA(elem, length))->hashcons(); - case Op_VecS: - return (TypeVect*)(new TypeVectS(elem, length))->hashcons(); - case Op_RegL: -@@ -2375,6 +2384,7 @@ const Type *TypeVect::xmeet( const Type *t ) const { - default: // All else is a mistake - typerr(t); - -+ case VectorA: - case VectorS: - case VectorD: - case VectorX: -@@ -2429,6 +2439,8 @@ bool TypeVect::empty(void) const { - #ifndef PRODUCT - void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const { - switch (base()) { -+ case VectorA: -+ st->print("vectora["); break; - case VectorS: - st->print("vectors["); break; - case VectorD: -diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp -index 27d042d94..82ee2dfcb 100644 ---- a/src/hotspot/share/opto/type.hpp -+++ b/src/hotspot/share/opto/type.hpp -@@ -53,6 +53,7 @@ class TypeNarrowKlass; - class TypeAry; - class TypeTuple; - class TypeVect; -+class TypeVectA; - class TypeVectS; - class TypeVectD; - class TypeVectX; -@@ -87,6 +88,7 @@ public: - - Tuple, // Method signature or object layout - Array, // Array types -+ VectorA, // (Scalable) Vector types for vector length agnostic - VectorS, // 32bit Vector types - VectorD, // 64bit Vector types - VectorX, // 128bit Vector types -@@ -769,6 +771,7 @@ public: - virtual const Type *xmeet( const Type *t) const; - virtual const Type *xdual() const; // Compute dual right now. - -+ static const TypeVect *VECTA; - static const TypeVect *VECTS; - static const TypeVect *VECTD; - static const TypeVect *VECTX; -@@ -780,6 +783,11 @@ public: - #endif - }; - -+class TypeVectA : public TypeVect { -+ friend class TypeVect; -+ TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {} -+}; -+ - class TypeVectS : public TypeVect { - friend class TypeVect; - TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {} -@@ -1630,12 +1638,12 @@ inline const TypeAry *Type::is_ary() const { - } - - inline const TypeVect *Type::is_vect() const { -- assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" ); -+ assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" ); - return (TypeVect*)this; - } - - inline const TypeVect *Type::isa_vect() const { -- return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL; -+ return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL; - } - - inline const TypePtr *Type::is_ptr() const { -diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp -index de22591ba..b82d631f4 100644 ---- a/src/hotspot/share/opto/vectornode.cpp -+++ b/src/hotspot/share/opto/vectornode.cpp -@@ -236,7 +236,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) { - (vlen > 1) && is_power_of_2(vlen) && - Matcher::vector_size_supported(bt, vlen)) { - int vopc = VectorNode::opcode(opc, bt); -- return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen); -+ return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen, bt); - } - return false; - } -@@ -655,7 +655,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) { - (vlen > 1) && is_power_of_2(vlen) && - Matcher::vector_size_supported(bt, vlen)) { - int vopc = ReductionNode::opcode(opc, bt); -- return vopc != opc && Matcher::match_rule_supported(vopc); -+ return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt); - } - return false; - } diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp -index c46247f2b..ee769634f 100644 +index a383297611..5e9228e705 100644 --- a/src/hotspot/share/runtime/abstract_vm_version.cpp +++ b/src/hotspot/share/runtime/abstract_vm_version.cpp -@@ -98,8 +98,13 @@ bool Abstract_VM_Version::_parallel_worker_threads_initialized = false; - #ifdef ZERO - #define VMTYPE "Zero" - #else // ZERO -- #define VMTYPE COMPILER1_PRESENT("Client") \ -- COMPILER2_PRESENT("Server") -+ #ifdef COMPILER2 -+ #define VMTYPE "Server" -+ #elif defined(COMPILER1) -+ #define VMTYPE "Client" -+ #else -+ #define VMTYPE "Core" -+ #endif // COMPILER2 - #endif // ZERO - #endif // TIERED - #endif -@@ -196,7 +201,8 @@ const char* Abstract_VM_Version::jre_release_version() { +@@ -196,7 +196,8 @@ const char* Abstract_VM_Version::jre_release_version() { IA32_ONLY("x86") \ IA64_ONLY("ia64") \ S390_ONLY("s390") \ @@ -57912,10 +54503,10 @@ index c46247f2b..ee769634f 100644 #endif // !CPU diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp -index 0a9c45f85..a96c2dd81 100644 +index 34c8d98362..7cf95058fe 100644 --- a/src/hotspot/share/runtime/thread.hpp +++ b/src/hotspot/share/runtime/thread.hpp -@@ -1234,7 +1234,7 @@ class JavaThread: public Thread { +@@ -1259,7 +1259,7 @@ class JavaThread: public Thread { address last_Java_pc(void) { return _anchor.last_Java_pc(); } // Safepoint support @@ -57925,35 +54516,30 @@ index 0a9c45f85..a96c2dd81 100644 void set_thread_state(JavaThreadState s) { assert(current_or_null() == NULL || current_or_null() == this, diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp -index dee8534f7..aa71d7655 100644 +index dee8534f73..9af07aeb45 100644 --- a/src/hotspot/share/runtime/thread.inline.hpp +++ b/src/hotspot/share/runtime/thread.inline.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) { set_has_async_exception(); } -#if defined(PPC64) || defined (AARCH64) -+#if defined(PPC64) || defined(AARCH64) || defined(RISCV64) ++#if defined(PPC64) || defined (AARCH64) || defined(RISCV64) inline JavaThreadState JavaThread::thread_state() const { return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state); } -diff --git a/src/hotspot/share/utilities/debug.cpp b/src/hotspot/share/utilities/debug.cpp -index 0b898dcc3..7f76486ae 100644 ---- a/src/hotspot/share/utilities/debug.cpp -+++ b/src/hotspot/share/utilities/debug.cpp -@@ -632,6 +632,7 @@ void help() { - tty->print_cr(" pns($sp, $rbp, $pc) on Linux/amd64 and Solaris/amd64 or"); - tty->print_cr(" pns($sp, $ebp, $pc) on Linux/x86 or"); - tty->print_cr(" pns($sp, $fp, $pc) on Linux/AArch64 or"); -+ tty->print_cr(" pns($sp, $fp, $pc) on Linux/RISCV64 or"); - tty->print_cr(" pns($sp, 0, $pc) on Linux/ppc64 or"); - tty->print_cr(" pns($sp + 0x7ff, 0, $pc) on Solaris/SPARC"); - tty->print_cr(" - in gdb do 'set overload-resolution off' before calling pns()"); diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp -index cf8025386..e8ab3097a 100644 +index 6605ab367c..7f1bcff6b3 100644 --- a/src/hotspot/share/utilities/macros.hpp +++ b/src/hotspot/share/utilities/macros.hpp -@@ -597,6 +597,32 @@ +@@ -601,6 +601,32 @@ #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x)) @@ -57986,26 +54572,17 @@ index cf8025386..e8ab3097a 100644 #ifdef VM_LITTLE_ENDIAN #define LITTLE_ENDIAN_ONLY(code) code #define BIG_ENDIAN_ONLY(code) -diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java -index 063a5ef3a..50e9cdb57 100644 ---- a/src/java.base/share/classes/java/lang/StringLatin1.java -+++ b/src/java.base/share/classes/java/lang/StringLatin1.java -@@ -209,6 +209,11 @@ final class StringLatin1 { - // Note: fromIndex might be near -1>>>1. - return -1; - } -+ return indexOfChar(value, ch, fromIndex, max); -+ } -+ -+ @HotSpotIntrinsicCandidate -+ private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { - byte c = (byte)ch; - for (int i = fromIndex; i < max; i++) { - if (value[i] == c) { diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -index 0d834302c..55a7b96f7 100644 +index 0d834302c5..45a927fb5e 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58,6 +58,10 @@ #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" #endif @@ -58022,71 +54599,76 @@ index 0d834302c..55a7b96f7 100644 } -#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) -+#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) ++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 (JNIEnv *env, jobject this_obj, jint lwp_id) { -@@ -422,6 +426,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo - #ifdef aarch64 - #define NPRGREG sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_NPRGREG +@@ -425,6 +429,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + #if defined(sparc) || defined(sparcv9) + #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG #endif +#ifdef riscv64 +#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG +#endif - #if defined(sparc) || defined(sparcv9) - #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG + #if defined(ppc64) || defined(ppc64le) + #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG #endif -@@ -534,6 +541,46 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo +@@ -534,6 +541,44 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo } #endif /* aarch64 */ +#if defined(riscv64) ++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg ++ ++ regs[REG_INDEX(PC)] = gregs.pc; ++ regs[REG_INDEX(LR)] = gregs.ra; ++ regs[REG_INDEX(SP)] = gregs.sp; ++ regs[REG_INDEX(R3)] = gregs.gp; ++ regs[REG_INDEX(R4)] = gregs.tp; ++ regs[REG_INDEX(R5)] = gregs.t0; ++ regs[REG_INDEX(R6)] = gregs.t1; ++ regs[REG_INDEX(R7)] = gregs.t2; ++ regs[REG_INDEX(R8)] = gregs.s0; ++ regs[REG_INDEX(R9)] = gregs.s1; ++ regs[REG_INDEX(R10)] = gregs.a0; ++ regs[REG_INDEX(R11)] = gregs.a1; ++ regs[REG_INDEX(R12)] = gregs.a2; ++ regs[REG_INDEX(R13)] = gregs.a3; ++ regs[REG_INDEX(R14)] = gregs.a4; ++ regs[REG_INDEX(R15)] = gregs.a5; ++ regs[REG_INDEX(R16)] = gregs.a6; ++ regs[REG_INDEX(R17)] = gregs.a7; ++ regs[REG_INDEX(R18)] = gregs.s2; ++ regs[REG_INDEX(R19)] = gregs.s3; ++ regs[REG_INDEX(R20)] = gregs.s4; ++ regs[REG_INDEX(R21)] = gregs.s5; ++ regs[REG_INDEX(R22)] = gregs.s6; ++ regs[REG_INDEX(R23)] = gregs.s7; ++ regs[REG_INDEX(R24)] = gregs.s8; ++ regs[REG_INDEX(R25)] = gregs.s9; ++ regs[REG_INDEX(R26)] = gregs.s10; ++ regs[REG_INDEX(R27)] = gregs.s11; ++ regs[REG_INDEX(R28)] = gregs.t3; ++ regs[REG_INDEX(R29)] = gregs.t4; ++ regs[REG_INDEX(R30)] = gregs.t5; ++ regs[REG_INDEX(R31)] = gregs.t6; + -+#define REG_INDEX(reg) sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg -+ -+ { -+ regs[REG_INDEX(PC)] = gregs.pc; -+ regs[REG_INDEX(LR)] = gregs.ra; -+ regs[REG_INDEX(SP)] = gregs.sp; -+ regs[REG_INDEX(R3)] = gregs.gp; -+ regs[REG_INDEX(R4)] = gregs.tp; -+ regs[REG_INDEX(R5)] = gregs.t0; -+ regs[REG_INDEX(R6)] = gregs.t1; -+ regs[REG_INDEX(R7)] = gregs.t2; -+ regs[REG_INDEX(R8)] = gregs.s0; -+ regs[REG_INDEX(R9)] = gregs.s1; -+ regs[REG_INDEX(R10)] = gregs.a0; -+ regs[REG_INDEX(R11)] = gregs.a1; -+ regs[REG_INDEX(R12)] = gregs.a2; -+ regs[REG_INDEX(R13)] = gregs.a3; -+ regs[REG_INDEX(R14)] = gregs.a4; -+ regs[REG_INDEX(R15)] = gregs.a5; -+ regs[REG_INDEX(R16)] = gregs.a6; -+ regs[REG_INDEX(R17)] = gregs.a7; -+ regs[REG_INDEX(R18)] = gregs.s2; -+ regs[REG_INDEX(R19)] = gregs.s3; -+ regs[REG_INDEX(R20)] = gregs.s4; -+ regs[REG_INDEX(R21)] = gregs.s5; -+ regs[REG_INDEX(R22)] = gregs.s6; -+ regs[REG_INDEX(R23)] = gregs.s7; -+ regs[REG_INDEX(R24)] = gregs.s8; -+ regs[REG_INDEX(R25)] = gregs.s9; -+ regs[REG_INDEX(R26)] = gregs.s10; -+ regs[REG_INDEX(R27)] = gregs.s11; -+ regs[REG_INDEX(R28)] = gregs.t3; -+ regs[REG_INDEX(R29)] = gregs.t4; -+ regs[REG_INDEX(R30)] = gregs.t5; -+ regs[REG_INDEX(R31)] = gregs.t6; -+ } +#endif /* riscv64 */ + #if defined(ppc64) || defined(ppc64le) #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h -index 8318e8e02..9d7fda8a6 100644 +index 8318e8e021..ab092d4ee3 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -43,6 +43,8 @@ #elif defined(arm) #include @@ -58096,41 +54678,11 @@ index 8318e8e02..9d7fda8a6 100644 #endif // This C bool type must be int for compatibility with Linux calls and -diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -index de5254d85..12eafc455 100644 ---- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -+++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -@@ -134,6 +134,9 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use - #define ptrace_getregs(request, pid, addr, data) ptrace(request, pid, data, addr) - #endif - -+// riscv kernel didn't implement compat_arch_ptrace function that will handle PT_GETREGS case -+// like other platforms, so call ptrace with PTRACE_GETREGSET here. -+#ifndef riscv64 - #if defined(_LP64) && defined(PTRACE_GETREGS64) - #define PTRACE_GETREGS_REQ PTRACE_GETREGS64 - #elif defined(PTRACE_GETREGS) -@@ -141,6 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use - #elif defined(PT_GETREGS) - #define PTRACE_GETREGS_REQ PT_GETREGS - #endif -+#endif - - #ifdef PTRACE_GETREGS_REQ - if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java -index 0f5f0119c..82c083055 100644 +index 0f5f0119c7..9bff9ee9b1 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java -@@ -1,6 +1,7 @@ - /* - * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -36,6 +37,7 @@ import sun.jvm.hotspot.debugger.MachineDescription; +@@ -36,6 +36,7 @@ import sun.jvm.hotspot.debugger.MachineDescription; import sun.jvm.hotspot.debugger.MachineDescriptionAMD64; import sun.jvm.hotspot.debugger.MachineDescriptionPPC64; import sun.jvm.hotspot.debugger.MachineDescriptionAArch64; @@ -58138,24 +54690,24 @@ index 0f5f0119c..82c083055 100644 import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; -@@ -592,6 +594,8 @@ public class HotSpotAgent { - machDesc = new MachineDescriptionPPC64(); - } else if (cpu.equals("aarch64")) { - machDesc = new MachineDescriptionAArch64(); +@@ -598,6 +599,8 @@ public class HotSpotAgent { + } else { + machDesc = new MachineDescriptionSPARC32Bit(); + } + } else if (cpu.equals("riscv64")) { + machDesc = new MachineDescriptionRISCV64(); - } else if (cpu.equals("sparc")) { - if (LinuxDebuggerLocal.getAddressSize()==8) { - machDesc = new MachineDescriptionSPARC64Bit(); + } else { + try { + machDesc = (MachineDescription) diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java new file mode 100644 -index 000000000..4221937f1 +index 0000000000..a972516dee --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58194,18 +54746,24 @@ index 000000000..4221937f1 + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java -index 5e5a6bb71..acd5844ca 100644 +index 5e5a6bb714..dc0bcb3da9 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java -@@ -33,6 +33,7 @@ import sun.jvm.hotspot.debugger.cdbg.*; - import sun.jvm.hotspot.debugger.x86.*; +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * +@@ -34,12 +34,14 @@ import sun.jvm.hotspot.debugger.x86.*; import sun.jvm.hotspot.debugger.amd64.*; import sun.jvm.hotspot.debugger.aarch64.*; -+import sun.jvm.hotspot.debugger.riscv64.*; import sun.jvm.hotspot.debugger.sparc.*; ++import sun.jvm.hotspot.debugger.riscv64.*; import sun.jvm.hotspot.debugger.ppc64.*; import sun.jvm.hotspot.debugger.linux.x86.*; -@@ -40,6 +41,7 @@ import sun.jvm.hotspot.debugger.linux.amd64.*; + import sun.jvm.hotspot.debugger.linux.amd64.*; import sun.jvm.hotspot.debugger.linux.sparc.*; import sun.jvm.hotspot.debugger.linux.ppc64.*; import sun.jvm.hotspot.debugger.linux.aarch64.*; @@ -58231,7 +54789,7 @@ index 5e5a6bb71..acd5844ca 100644 return context.getTopFrame(dbg); diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java new file mode 100644 -index 000000000..eaef586b4 +index 0000000000..f06da24bd0 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java @@ -0,0 +1,90 @@ @@ -58327,7 +54885,7 @@ index 000000000..eaef586b4 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java new file mode 100644 -index 000000000..4789e664c +index 0000000000..fdb841ccf3 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java @@ -0,0 +1,48 @@ @@ -58379,39 +54937,9 @@ index 000000000..4789e664c + return debugger.newAddress(getRegister(index)); + } +} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java -index 74e957d94..1f44d75ee 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java -@@ -32,12 +32,14 @@ import sun.jvm.hotspot.debugger.*; - import sun.jvm.hotspot.debugger.cdbg.*; - import sun.jvm.hotspot.debugger.proc.amd64.*; - import sun.jvm.hotspot.debugger.proc.aarch64.*; -+import sun.jvm.hotspot.debugger.proc.riscv64.*; - import sun.jvm.hotspot.debugger.proc.sparc.*; - import sun.jvm.hotspot.debugger.proc.ppc64.*; - import sun.jvm.hotspot.debugger.proc.x86.*; - import sun.jvm.hotspot.debugger.ppc64.*; - import sun.jvm.hotspot.debugger.amd64.*; - import sun.jvm.hotspot.debugger.aarch64.*; -+import sun.jvm.hotspot.debugger.riscv64.*; - import sun.jvm.hotspot.debugger.sparc.*; - import sun.jvm.hotspot.debugger.x86.*; - import sun.jvm.hotspot.utilities.*; -@@ -94,6 +96,10 @@ public class ProcDebuggerLocal extends DebuggerBase implements ProcDebugger { - threadFactory = new ProcAARCH64ThreadFactory(this); - pcRegIndex = AARCH64ThreadContext.PC; - fpRegIndex = AARCH64ThreadContext.FP; -+ } else if (cpu.equals("riscv64")) { -+ threadFactory = new ProcRISCV64ThreadFactory(this); -+ pcRegIndex = RISCV64ThreadContext.PC; -+ fpRegIndex = RISCV64ThreadContext.FP; - } else if (cpu.equals("ppc64")) { - threadFactory = new ProcPPC64ThreadFactory(this); - pcRegIndex = PPC64ThreadContext.PC; diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java new file mode 100644 -index 000000000..c1cf1fb0f +index 0000000000..96d5dee47c --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java @@ -0,0 +1,88 @@ @@ -58505,7 +55033,7 @@ index 000000000..c1cf1fb0f +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java new file mode 100644 -index 000000000..498fa0dc6 +index 0000000000..f2aa845e66 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java @@ -0,0 +1,48 @@ @@ -58559,7 +55087,7 @@ index 000000000..498fa0dc6 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java new file mode 100644 -index 000000000..81afd8fdc +index 0000000000..19f64b8ce2 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java @@ -0,0 +1,46 @@ @@ -58611,7 +55139,7 @@ index 000000000..81afd8fdc +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java new file mode 100644 -index 000000000..ab92e3e74 +index 0000000000..aecbda5902 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java @@ -0,0 +1,55 @@ @@ -58672,7 +55200,7 @@ index 000000000..ab92e3e74 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java new file mode 100644 -index 000000000..1e8cd19b2 +index 0000000000..1d3da6be5a --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java @@ -0,0 +1,48 @@ @@ -58726,7 +55254,7 @@ index 000000000..1e8cd19b2 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java new file mode 100644 -index 000000000..eecb6e029 +index 0000000000..725b94e25a --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java @@ -0,0 +1,46 @@ @@ -58778,7 +55306,7 @@ index 000000000..eecb6e029 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java new file mode 100644 -index 000000000..426ff0580 +index 0000000000..fb60a70427 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java @@ -0,0 +1,172 @@ @@ -58828,9 +55356,9 @@ index 000000000..426ff0580 + // */ + // struct sigcontext { + // struct user_regs_struct sc_regs; -+ // union __riscv_fp_state sc_fpregs; ++ // union __riscv_fp_state sc_fpregs; + // }; -+ // ++ // + // struct user_regs_struct { + // unsigned long pc; + // unsigned long ra; @@ -58955,9 +55483,16 @@ index 000000000..426ff0580 + public abstract Address getRegisterAsAddress(int index); +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java -index 190062785..74bd614d3 100644 +index 190062785a..89d676fe3b 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.win32_aarch64.Win32AARCH64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; @@ -58977,7 +55512,7 @@ index 190062785..74bd614d3 100644 access = (JavaThreadPDAccess) diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java new file mode 100644 -index 000000000..2df0837b6 +index 0000000000..5c2b6e0e3e --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java @@ -0,0 +1,132 @@ @@ -59115,7 +55650,7 @@ index 000000000..2df0837b6 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java new file mode 100644 -index 000000000..a3bbf1ad1 +index 0000000000..34701c6922 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java @@ -0,0 +1,223 @@ @@ -59344,14 +55879,14 @@ index 000000000..a3bbf1ad1 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java new file mode 100644 -index 000000000..c04def5a1 +index 0000000000..e372bc5f7b --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java @@ -0,0 +1,554 @@ +/* + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Red Hat Inc. -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2021, 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -59735,11 +56270,11 @@ index 000000000..c04def5a1 + Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + + // The return_address is always the word on the stack -+ Address senderPC = senderSP.getAddressAt(RETURN_ADDR_OFFSET * VM.getVM().getAddressSize()); ++ Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + + // This is the saved value of FP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame. -+ Address savedFPAddr = senderSP.addOffsetTo(LINK_OFFSET * VM.getVM().getAddressSize()); ++ Address savedFPAddr = senderSP.addOffsetTo(-2 * VM.getVM().getAddressSize()); + + if (map.getUpdateMap()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. @@ -59904,10 +56439,10 @@ index 000000000..c04def5a1 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java new file mode 100644 -index 000000000..4d79e3ee4 +index 0000000000..850758a7ed --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java -@@ -0,0 +1,58 @@ +@@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. @@ -59940,6 +56475,7 @@ index 000000000..4d79e3ee4 +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.utilities.*; + +public class RISCV64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; @@ -59968,7 +56504,7 @@ index 000000000..4d79e3ee4 +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java new file mode 100644 -index 000000000..d7187a5f8 +index 0000000000..4aeb1c6f55 --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java @@ -0,0 +1,53 @@ @@ -60026,9 +56562,16 @@ index 000000000..d7187a5f8 + protected Address getLocationPD(VMReg reg) { return null; } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java -index 7d7a6107c..948eabcab 100644 +index 7d7a6107ca..6552ce255f 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -54,7 +54,7 @@ public class PlatformInfo { public static boolean knownCPU(String cpu) { @@ -60038,42 +56581,18 @@ index 7d7a6107c..948eabcab 100644 for(String s : KNOWN) { if(s.equals(cpu)) -diff --git a/src/utils/hsdis/hsdis.c b/src/utils/hsdis/hsdis.c -index d0a6f4ea8..a29c7bf8b 100644 ---- a/src/utils/hsdis/hsdis.c -+++ b/src/utils/hsdis/hsdis.c -@@ -28,9 +28,6 @@ - */ - - #include /* required by bfd.h */ --#include --#include --#include - - #include - #include -@@ -479,6 +476,9 @@ static const char* native_arch_name() { - #endif - #ifdef LIBARCH_s390x - res = "s390:64-bit"; -+#endif -+#ifdef LIBARCH_riscv64 -+ res = "riscv:rv64"; - #endif - if (res == NULL) - res = "architecture not set in Makefile!"; diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java -index 7805918c2..a21307083 100644 +index 7805918c28..823b9f39db 100644 --- a/test/hotspot/jtreg/compiler/c2/TestBit.java +++ b/test/hotspot/jtreg/compiler/c2/TestBit.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -34,7 +35,7 @@ import jdk.test.lib.process.ProcessTools; +@@ -34,7 +34,7 @@ import jdk.test.lib.process.ProcessTools; * * @run driver compiler.c2.TestBit * @@ -60082,7 +56601,7 @@ index 7805918c2..a21307083 100644 * @requires vm.debug == true & vm.compiler2.enabled */ public class TestBit { -@@ -54,7 +55,8 @@ public class TestBit { +@@ -54,7 +54,8 @@ public class TestBit { String expectedTestBitInstruction = "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" : "aarch64".equals(System.getProperty("os.arch")) ? "tb" : @@ -60092,26 +56611,112 @@ index 7805918c2..a21307083 100644 if (expectedTestBitInstruction != null) { output.shouldContain(expectedTestBitInstruction); +diff --git a/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java +new file mode 100644 +index 0000000000..5a1b659bbe +--- /dev/null ++++ b/test/hotspot/jtreg/compiler/floatingpoint/TestLibmIntrinsics.java +@@ -0,0 +1,80 @@ ++/* ++ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Alibaba Group Holding Limited. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++/* ++ * @test ++ * @summary Test libm intrinsics ++ * @library /test/lib / ++ * ++ * @build sun.hotspot.WhiteBox ++ * @run driver ClassFileInstaller sun.hotspot.WhiteBox ++ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI ++ * -XX:-BackgroundCompilation -XX:-UseOnStackReplacement ++ * compiler.floatingpoint.TestLibmIntrinsics ++ */ ++ ++package compiler.floatingpoint; ++ ++import compiler.whitebox.CompilerWhiteBoxTest; ++import sun.hotspot.WhiteBox; ++ ++import java.lang.reflect.Method; ++ ++public class TestLibmIntrinsics { ++ ++ private static final WhiteBox WHITE_BOX = WhiteBox.getWhiteBox(); ++ ++ private static final double pi = 3.1415926; ++ ++ private static final double expected = 2.5355263553695413; ++ ++ static double m() { ++ return Math.pow(pi, Math.sin(Math.cos(Math.tan(Math.log(Math.log10(Math.exp(pi))))))); ++ } ++ ++ static public void main(String[] args) throws NoSuchMethodException { ++ Method test_method = compiler.floatingpoint.TestLibmIntrinsics.class.getDeclaredMethod("m"); ++ ++ double interpreter_result = m(); ++ ++ // Compile with C1 if possible ++ WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE); ++ ++ double c1_result = m(); ++ ++ WHITE_BOX.deoptimizeMethod(test_method); ++ ++ // Compile it with C2 if possible ++ WHITE_BOX.enqueueMethodForCompilation(test_method, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); ++ ++ double c2_result = m(); ++ ++ if (interpreter_result != c1_result || ++ interpreter_result != c2_result || ++ c1_result != c2_result) { ++ System.out.println("interpreter = " + interpreter_result + " c1 = " + c1_result + " c2 = " + c2_result); ++ throw new RuntimeException("Test Failed"); ++ } ++ } ++} diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java -index 558b4218f..9d875e33f 100644 +index 558b4218f0..55374b116e 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli; - +@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; -@@ -54,6 +56,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU { + +@@ -54,6 +55,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU { SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), @@ -60121,25 +56726,25 @@ index 558b4218f..9d875e33f 100644 SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU( diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java -index 3ed72bf0a..a7e277060 100644 +index 3ed72bf0a9..8fb82ee453 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli; - +@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; -@@ -54,6 +56,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU { + +@@ -54,6 +55,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU { SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), @@ -60149,25 +56754,25 @@ index 3ed72bf0a..a7e277060 100644 SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU( diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java -index c05cf309d..e714fcc59 100644 +index c05cf309da..aca32137ed 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli; - +@@ -42,6 +42,7 @@ package compiler.intrinsics.sha.cli; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; -@@ -54,6 +56,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU { + +@@ -54,6 +55,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU { SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), @@ -60177,25 +56782,25 @@ index c05cf309d..e714fcc59 100644 SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), new GenericTestCaseForOtherCPU( diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java -index 58ce5366b..d52d81e26 100644 +index 58ce5366ba..8deac4f789 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -40,6 +41,7 @@ package compiler.intrinsics.sha.cli; - +@@ -41,6 +41,7 @@ package compiler.intrinsics.sha.cli; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; -+import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU; -@@ -53,6 +55,8 @@ public class TestUseSHAOptionOnUnsupportedCPU { + +@@ -53,6 +54,8 @@ public class TestUseSHAOptionOnUnsupportedCPU { SHAOptionsBase.USE_SHA_OPTION), new GenericTestCaseForUnsupportedAArch64CPU( SHAOptionsBase.USE_SHA_OPTION), @@ -60205,17 +56810,17 @@ index 58ce5366b..d52d81e26 100644 SHAOptionsBase.USE_SHA_OPTION), new GenericTestCaseForOtherCPU( diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java -index faa9fdbae..50e549069 100644 +index faa9fdbae6..2663500204 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -32,26 +33,27 @@ import jdk.test.lib.cli.predicate.OrPredicate; +@@ -32,26 +32,27 @@ import jdk.test.lib.cli.predicate.OrPredicate; /** * Generic test case for SHA-related options targeted to any CPU except @@ -60243,19 +56848,19 @@ index faa9fdbae..50e549069 100644 String shouldPassMessage = String.format("JVM should start with " + "option '%s' without any warnings", optionName); - // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of -+ // Verify that on non-x86, non-SPARC, non-AArch64 CPU and non-RISCV64 usage of ++ // Verify that on non-x86, non-RISCV64, non-SPARC and non-AArch64 CPU usage of // SHA-related options will not cause any warnings. CommandLineOptionTest.verifySameJVMStartup(null, new String[] { ".*" + optionName + ".*" }, shouldPassMessage, diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java new file mode 100644 -index 000000000..d81b5b53f +index 0000000000..8566d57c39 --- /dev/null +++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java -@@ -0,0 +1,102 @@ +@@ -0,0 +1,115 @@ +/* + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -60292,10 +56897,19 @@ index 000000000..d81b5b53f + */ +public class GenericTestCaseForUnsupportedRISCV64CPU extends + SHAOptionsBase.TestCase { ++ ++ final private boolean checkUseSHA; ++ + public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) { ++ this(optionName, true); ++ } ++ ++ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName, boolean checkUseSHA) { + super(optionName, new AndPredicate(Platform::isRISCV64, + new NotPredicate(SHAOptionsBase.getPredicateForOption( + optionName)))); ++ ++ this.checkUseSHA = checkUseSHA; + } + + @Override @@ -60309,22 +56923,24 @@ index 000000000..d81b5b53f + SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, + CommandLineOptionTest.prepareBooleanFlag(optionName, false)); + -+ shouldPassMessage = String.format("If JVM is started with '-XX:-" -+ + "%s' '-XX:+%s', output should contain warning.", -+ SHAOptionsBase.USE_SHA_OPTION, optionName); -+ -+ // Verify that when the tested option is enabled, then -+ // a warning will occur in VM output if UseSHA is disabled. -+ if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) { -+ CommandLineOptionTest.verifySameJVMStartup( -+ new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) }, -+ null, -+ shouldPassMessage, -+ shouldPassMessage, -+ ExitCode.OK, -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false), -+ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); ++ if (checkUseSHA) { ++ shouldPassMessage = String.format("If JVM is started with '-XX:-" ++ + "%s' '-XX:+%s', output should contain warning.", ++ SHAOptionsBase.USE_SHA_OPTION, optionName); ++ ++ // Verify that when the tested option is enabled, then ++ // a warning will occur in VM output if UseSHA is disabled. ++ if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) { ++ CommandLineOptionTest.verifySameJVMStartup( ++ new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) }, ++ null, ++ shouldPassMessage, ++ shouldPassMessage, ++ ExitCode.OK, ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false), ++ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); ++ } + } + } + @@ -60336,188 +56952,38 @@ index 000000000..d81b5b53f + optionName), + SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); + -+ // Verify that option is disabled even if it was explicitly enabled -+ // using CLI options. -+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", -+ String.format("Option '%s' should be off on unsupported " -+ + "RISCV64CPU even if set to true directly", optionName), -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); -+ -+ // Verify that option is disabled when +UseSHA was passed to JVM. -+ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", -+ String.format("Option '%s' should be off on unsupported " -+ + "RISCV64CPU even if %s flag set to JVM", -+ optionName, CommandLineOptionTest.prepareBooleanFlag( -+ SHAOptionsBase.USE_SHA_OPTION, true)), -+ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, -+ CommandLineOptionTest.prepareBooleanFlag( -+ SHAOptionsBase.USE_SHA_OPTION, true)); -+ } -+} -diff --git a/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java -new file mode 100644 -index 000000000..d3aafec8e ---- /dev/null -+++ b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java -@@ -0,0 +1,153 @@ -+/* -+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ */ -+ -+/* -+ * @test -+ * @bug 8173585 -+ * @summary Test intrinsification of StringLatin1.indexOf(char). Note that -+ * differing code paths are taken contingent upon the length of the input String. -+ * Hence we must test against differing string lengths in order to validate -+ * correct functionality. We also ensure the strings are long enough to trigger -+ * the looping conditions of the individual code paths. -+ * -+ * Run with varing levels of AVX and SSE support, also without the intrinsic at all -+ * -+ * @library /compiler/patches /test/lib -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+UnlockDiagnosticVMOptions -XX:DisableIntrinsic=_indexOfL_char compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseSSE=0 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=1 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=2 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=3 compiler.intrinsics.string.TestStringLatin1IndexOfChar -+ */ -+ -+package compiler.intrinsics.string; -+ -+import jdk.test.lib.Asserts; -+ -+public class TestStringLatin1IndexOfChar{ -+ private final static int MAX_LENGTH = 2048;//future proof for AVX-512 instructions -+ -+ public static void main(String[] args) throws Exception { -+ for (int i = 0; i < 1_000; ++i) {//repeat such that we enter into C2 code... -+ findOneItem(); -+ withOffsetTest(); -+ testEmpty(); -+ } -+ } -+ -+ private static void testEmpty(){ -+ Asserts.assertEQ("".indexOf('a'), -1); -+ } -+ -+ private final static char SEARCH_CHAR = 'z'; -+ private final static char INVERLEAVING_CHAR = 'a'; -+ private final static char MISSING_CHAR = 'd'; -+ -+ private static void findOneItem(){ -+ //test strings of varying length ensuring that for all lengths one instance of the -+ //search char can be found. We check what happens when the search character is in -+ //each position of the search string (including first and last positions) -+ for(int strLength : new int[]{1, 15, 31, 32, 79}){ -+ for(int searchPos = 0; searchPos < strLength; searchPos++){ -+ String totest = makeOneItemStringLatin1(strLength, searchPos); -+ -+ int intri = totest.indexOf(SEARCH_CHAR); -+ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0); -+ Asserts.assertEQ(intri, nonintri); -+ } -+ } -+ } -+ -+ private static String makeOneItemStringLatin1(int length, int searchPos){ -+ StringBuilder sb = new StringBuilder(length); -+ -+ for(int n =0; n < length; n++){ -+ sb.append(searchPos==n?SEARCH_CHAR:INVERLEAVING_CHAR); -+ } -+ -+ return sb.toString(); -+ } -+ -+ private static void withOffsetTest(){ -+ //progressivly move through string checking indexes and starting offset correctly processed -+ //string is of form azaza, aazaazaa, aaazaaazaaa, etc -+ //we find n s.t. maxlength = (n*3) + 2 -+ int maxaInstances = (MAX_LENGTH-2)/3; -+ -+ for(int aInstances = 5; aInstances < MAX_LENGTH; aInstances++){ -+ String totest = makeWithOffsetStringLatin1(aInstances); -+ -+ int startoffset; -+ { -+ int intri = totest.indexOf(SEARCH_CHAR); -+ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0); -+ -+ Asserts.assertEQ(intri, nonintri); -+ startoffset = intri+1; -+ } -+ -+ { -+ int intri = totest.indexOf(SEARCH_CHAR, startoffset); -+ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, startoffset); -+ -+ Asserts.assertEQ(intri, nonintri); -+ startoffset = intri+1; -+ } -+ -+ Asserts.assertEQ(totest.indexOf(SEARCH_CHAR, startoffset), -1);//only two SEARCH_CHAR per string -+ Asserts.assertEQ(totest.indexOf(MISSING_CHAR), -1); -+ } -+ } -+ -+ private static String makeWithOffsetStringLatin1(int aInstances){ -+ StringBuilder sb = new StringBuilder((aInstances*3) + 2); -+ for(int n =0; n < aInstances; n++){ -+ sb.append(INVERLEAVING_CHAR); -+ } -+ -+ sb.append(SEARCH_CHAR); -+ -+ for(int n =0; n < aInstances; n++){ -+ sb.append(INVERLEAVING_CHAR); -+ } -+ -+ sb.append(SEARCH_CHAR); -+ -+ for(int n =0; n < aInstances; n++){ -+ sb.append(INVERLEAVING_CHAR); -+ } -+ return sb.toString(); -+ } ++ if (checkUseSHA) { ++ // Verify that option is disabled even if it was explicitly enabled ++ // using CLI options. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be off on unsupported " ++ + "RISCV64CPU even if set to true directly", optionName), ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); + -+ private static int indexOfCharNonIntrinsic(String value, int ch, int fromIndex) { -+ //non intrinsic version of indexOfChar -+ byte c = (byte)ch; -+ for (int i = fromIndex; i < value.length(); i++) { -+ if (value.charAt(i) == c) { -+ return i; -+ } ++ // Verify that option is disabled when +UseSHA was passed to JVM. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be off on unsupported " ++ + "RISCV64CPU even if %s flag set to JVM", ++ optionName, CommandLineOptionTest.prepareBooleanFlag( ++ SHAOptionsBase.USE_SHA_OPTION, true)), ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag( ++ SHAOptionsBase.USE_SHA_OPTION, true)); + } -+ return -1; + } +} diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java -index 2e3e2717a..8093d6598 100644 +index 2e3e2717a6..7be8af6d03 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60528,9 +56994,16 @@ index 2e3e2717a..8093d6598 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java -index 0e06a9e43..1ff9f36e1 100644 +index 0e06a9e432..797927b42b 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60541,9 +57014,16 @@ index 0e06a9e43..1ff9f36e1 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java -index c3cdbf374..f3531ea74 100644 +index c3cdbf3746..be8f7d586c 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60554,9 +57034,16 @@ index c3cdbf374..f3531ea74 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java -index d33bd411f..589209447 100644 +index d33bd411f1..d96d5e29c0 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60567,9 +57054,16 @@ index d33bd411f..589209447 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions * -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java -index 992fa4b51..907e21371 100644 +index 992fa4b516..b09c873d05 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8138583 @@ -60580,9 +57074,16 @@ index 992fa4b51..907e21371 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java -index 3e79b3528..c41c0b606 100644 +index 3e79b3528b..fe40ed6f98 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8138583 @@ -60593,9 +57094,16 @@ index 3e79b3528..c41c0b606 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java -index 6603dd224..b626da40d 100644 +index 6603dd224e..5163191049 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8135028 @@ -60606,9 +57114,16 @@ index 6603dd224..b626da40d 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java -index d9a0c9880..92cd84a2f 100644 +index d9a0c98800..d999ae423c 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60619,9 +57134,16 @@ index d9a0c9880..92cd84a2f 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java -index 722db95ae..e72345799 100644 +index 722db95aed..65912a5c7f 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60632,9 +57154,16 @@ index 722db95ae..e72345799 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java -index f58f21feb..f4f67cf52 100644 +index f58f21feb2..fffdc2f756 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -25,7 +25,7 @@ * @test * @bug 8074981 @@ -60645,7 +57174,7 @@ index f58f21feb..f4f67cf52 100644 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 * -XX:CompileThresholdScaling=0.1 diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java -index acb86812d..c5e38ba72 100644 +index acb86812d2..2c866f26f0 100644 --- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java @@ -24,7 +24,7 @@ @@ -60653,12 +57182,12 @@ index acb86812d..c5e38ba72 100644 /* @test * @bug 8167409 - * @requires (os.arch != "aarch64") & (os.arch != "arm") -+ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") ++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm") * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs */ package compiler.runtime.criticalnatives.argumentcorruption; diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java -index eab36f931..4437367b6 100644 +index eab36f9311..1da369fde2 100644 --- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java @@ -24,7 +24,7 @@ @@ -60666,14 +57195,21 @@ index eab36f931..4437367b6 100644 /* @test * @bug 8167408 - * @requires (os.arch != "aarch64") & (os.arch != "arm") -+ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") ++ * @requires (os.arch != "aarch64") & (os.arch != "riscv64") & (os.arch != "arm") * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp */ package compiler.runtime.criticalnatives.lookup; diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java -index 7774dabcb..284b51019 100644 +index 7774dabcb5..7afe3560f3 100644 --- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2014, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -61,15 +61,17 @@ public class IntrinsicPredicates { public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE @@ -60716,17 +57252,17 @@ index 7774dabcb..284b51019 100644 public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE, diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java -index 57256aa5a..16c199e37 100644 +index 57256aa5a3..d4d43b01ae 100644 --- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java +++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -112,7 +113,7 @@ public class CheckForProperDetailStackTrace { +@@ -112,7 +112,7 @@ public class CheckForProperDetailStackTrace { // It's ok for ARM not to have symbols, because it does not support NMT detail // when targeting thumb2. It's also ok for Windows not to have symbols, because // they are only available if the symbols file is included with the build. @@ -60736,17 +57272,17 @@ index 57256aa5a..16c199e37 100644 } output.reportDiagnosticSummary(); diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java -index 127bb6abc..46be4dc98 100644 +index 127bb6abcd..eab19273ad 100644 --- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -239,7 +240,7 @@ public class ReservedStackTest { +@@ -239,7 +239,7 @@ public class ReservedStackTest { return Platform.isAix() || (Platform.isLinux() && (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || @@ -60755,95 +57291,46 @@ index 127bb6abc..46be4dc98 100644 Platform.isOSX() || Platform.isSolaris(); } -diff --git a/test/hotspot/jtreg/test_env.sh b/test/hotspot/jtreg/test_env.sh -index 0c300d4fd..7f3698c47 100644 ---- a/test/hotspot/jtreg/test_env.sh -+++ b/test/hotspot/jtreg/test_env.sh -@@ -185,6 +185,11 @@ if [ $? = 0 ] - then - VM_CPU="arm" - fi -+grep "riscv64" vm_version.out > ${NULL} -+if [ $? = 0 ] -+then -+ VM_CPU="riscv64" -+fi - grep "ppc" vm_version.out > ${NULL} - if [ $? = 0 ] - then diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -index 77458554b..73e92855d 100644 +index 126a43a900..feb4de5388 100644 --- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -@@ -1,5 +1,6 @@ - /* - * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -45,7 +46,7 @@ import java.util.Set; +@@ -45,7 +45,7 @@ import java.util.Set; */ public class TestMutuallyExclusivePlatformPredicates { private static enum MethodGroup { - ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), -+ ARCH("isRISCV64", "isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), ++ ARCH("isAArch64", "isARM", "isRISCV64", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), BITNESS("is32bit", "is64bit"), OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"), VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"), -diff --git a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java -index cb3348a0f..bc0d1a743 100644 ---- a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java -+++ b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java -@@ -63,13 +63,13 @@ public class thrinfo001 { - try { - t_a.join(); - } catch (InterruptedException e) {} -+ checkInfo(t_a, t_a.getThreadGroup(), 1); - - thrinfo001b t_b = new thrinfo001b(); - t_b.setPriority(Thread.MIN_PRIORITY); - t_b.setDaemon(true); - checkInfo(t_b, t_b.getThreadGroup(), 2); - t_b.start(); -- checkInfo(t_b, t_b.getThreadGroup(), 2); - try { - t_b.join(); - } catch (InterruptedException e) {} diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java -index 7990c49a1..bb8c79cdd 100644 +index 7990c49a1f..abeff80e5e 100644 --- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java -@@ -1,5 +1,6 @@ +@@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. +- * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it -@@ -54,8 +55,8 @@ public class TestCPUInformation { +@@ -54,8 +54,8 @@ public class TestCPUInformation { Events.assertField(event, "hwThreads").atLeast(1); Events.assertField(event, "cores").atLeast(1); Events.assertField(event, "sockets").atLeast(1); - Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390"); - Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390"); -+ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64"); -+ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64"); ++ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); ++ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "RISCV64", "s390"); } } } diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java -index f4ee0546c..a9cd63db9 100644 +index 6269373c2b..e1511772e7 100644 --- a/test/lib/jdk/test/lib/Platform.java +++ b/test/lib/jdk/test/lib/Platform.java -@@ -1,5 +1,6 @@ - /* - * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it -@@ -202,6 +203,10 @@ public class Platform { +@@ -205,6 +205,10 @@ public class Platform { return isArch("arm.*"); } @@ -60854,233 +57341,3 @@ index f4ee0546c..a9cd63db9 100644 public static boolean isPPC() { return isArch("ppc.*"); } -diff --git a/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java -new file mode 100644 -index 000000000..6852c0540 ---- /dev/null -+++ b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java -@@ -0,0 +1,221 @@ -+/* -+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ */ -+package org.openjdk.bench.java.lang; -+ -+import java.util.Random; -+import org.openjdk.jmh.annotations.Benchmark; -+import org.openjdk.jmh.annotations.BenchmarkMode; -+import org.openjdk.jmh.annotations.OutputTimeUnit; -+import org.openjdk.jmh.annotations.Mode; -+import org.openjdk.jmh.annotations.Scope; -+import org.openjdk.jmh.annotations.State; -+ -+import java.util.concurrent.TimeUnit; -+ -+/** -+ * This benchmark can be used to measure performance between StringLatin1 and StringUTF16 in terms of -+ * performance of the indexOf(char) and indexOf(String) methods which are intrinsified. -+ * On x86 the behaviour of the indexOf method is contingent upon the length of the string -+ */ -+@BenchmarkMode(Mode.AverageTime) -+@OutputTimeUnit(TimeUnit.NANOSECONDS) -+@State(Scope.Thread) -+public class IndexOfBenchmark { -+ private static final int loops = 100000; -+ private static final Random rng = new Random(1999); -+ private static final int pathCnt = 1000; -+ private static final String [] latn1_short = new String[pathCnt]; -+ private static final String [] latn1_sse4 = new String[pathCnt]; -+ private static final String [] latn1_avx2 = new String[pathCnt]; -+ private static final String [] latn1_mixedLength = new String[pathCnt]; -+ private static final String [] utf16_short = new String[pathCnt]; -+ private static final String [] utf16_sse4 = new String[pathCnt]; -+ private static final String [] utf16_avx2 = new String[pathCnt]; -+ private static final String [] utf16_mixedLength = new String[pathCnt]; -+ static { -+ for (int i = 0; i < pathCnt; i++) { -+ latn1_short[i] = makeRndString(false, 15); -+ latn1_sse4[i] = makeRndString(false, 16); -+ latn1_avx2[i] = makeRndString(false, 32); -+ utf16_short[i] = makeRndString(true, 7); -+ utf16_sse4[i] = makeRndString(true, 8); -+ utf16_avx2[i] = makeRndString(true, 16); -+ latn1_mixedLength[i] = makeRndString(false, rng.nextInt(65)); -+ utf16_mixedLength[i] = makeRndString(true, rng.nextInt(65)); -+ } -+ } -+ -+ private static String makeRndString(boolean isUtf16, int length) { -+ StringBuilder sb = new StringBuilder(length); -+ if(length > 0){ -+ sb.append(isUtf16?'☺':'b'); -+ -+ for (int i = 1; i < length-1; i++) { -+ sb.append((char)('b' + rng.nextInt(26))); -+ } -+ -+ sb.append(rng.nextInt(3) >= 1?'a':'b');//66.6% of time 'a' is in string -+ } -+ return sb.toString(); -+ } -+ -+ -+ @Benchmark -+ public static void latin1_mixed_char() { -+ int ret = 0; -+ for (String what : latn1_mixedLength) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static void utf16_mixed_char() { -+ int ret = 0; -+ for (String what : utf16_mixedLength) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static void latin1_mixed_String() { -+ int ret = 0; -+ for (String what : latn1_mixedLength) { -+ ret += what.indexOf("a"); -+ } -+ } -+ -+ @Benchmark -+ public static void utf16_mixed_String() { -+ int ret = 0; -+ for (String what : utf16_mixedLength) { -+ ret += what.indexOf("a"); -+ } -+ } -+ -+ ////////// more detailed code path dependent tests ////////// -+ -+ @Benchmark -+ public static void latin1_Short_char() { -+ int ret = 0; -+ for (String what : latn1_short) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static void latin1_SSE4_char() { -+ int ret = 0; -+ for (String what : latn1_sse4) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static void latin1_AVX2_char() { -+ int ret = 0; -+ for (String what : latn1_avx2) { -+ ret += what.indexOf('a'); -+ } -+ } -+ -+ @Benchmark -+ public static int utf16_Short_char() { -+ int ret = 0; -+ for (String what : utf16_short) { -+ ret += what.indexOf('a'); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_SSE4_char() { -+ int ret = 0; -+ for (String what : utf16_sse4) { -+ ret += what.indexOf('a'); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_AVX2_char() { -+ int ret = 0; -+ for (String what : utf16_avx2) { -+ ret += what.indexOf('a'); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int latin1_Short_String() { -+ int ret = 0; -+ for (String what : latn1_short) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int latin1_SSE4_String() { -+ int ret = 0; -+ for (String what : latn1_sse4) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int latin1_AVX2_String() { -+ int ret = 0; -+ for (String what : latn1_avx2) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_Short_String() { -+ int ret = 0; -+ for (String what : utf16_short) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_SSE4_String() { -+ int ret = 0; -+ for (String what : utf16_sse4) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+ -+ @Benchmark -+ public static int utf16_AVX2_String() { -+ int ret = 0; -+ for (String what : utf16_avx2) { -+ ret += what.indexOf("a"); -+ } -+ return ret; -+ } -+} --- -2.40.0.windows.1 - diff --git a/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch b/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch index e395c0b7210708ddc2baa7dbc7fb96600f5e8cf5..8d4548aad36df00f937ee2babb039206bb059a35 100755 --- a/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch +++ b/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch @@ -367,4 +367,4 @@ index 000000000..85b49171c --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ -+11.0.22.0.13 ++11.0.24.0.13 diff --git a/LoongArch64-support.patch b/LoongArch64-support.patch index e26615bbde0e0da8cb84cd049b290cd8b63f1919..9c2bb8354556a1dcd1cbf01de61e1125db177f12 100644 --- a/LoongArch64-support.patch +++ b/LoongArch64-support.patch @@ -27,7 +27,7 @@ index 46fb9b4219..c6d8b24fc4 100644 org.graalvm.compiler.asm.sparc.test \ org.graalvm.compiler.asm.test \ diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4 -index a3e1e00b2c..22f479120b 100644 +index 9bb34363e5..c95a2447ef 100644 --- a/make/autoconf/hotspot.m4 +++ b/make/autoconf/hotspot.m4 @@ -34,6 +34,12 @@ DEPRECATED_JVM_FEATURES="trace" @@ -43,7 +43,7 @@ index a3e1e00b2c..22f479120b 100644 ############################################################################### # Check if the specified JVM variant should be built. To be used in shell if # constructs, like this: -@@ -337,6 +343,26 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], +@@ -340,6 +346,26 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], HOTSPOT_TARGET_CPU_ARCH=arm fi @@ -70,7 +70,7 @@ index a3e1e00b2c..22f479120b 100644 # Verify that dependencies are met for explicitly set features. if HOTSPOT_CHECK_JVM_FEATURE(jvmti) && ! HOTSPOT_CHECK_JVM_FEATURE(services); then AC_MSG_ERROR([Specified JVM feature 'jvmti' requires feature 'services']) -@@ -421,10 +447,11 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], +@@ -424,10 +450,11 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], JVM_FEATURES_jvmci="" INCLUDE_JVMCI="false" else @@ -85,7 +85,7 @@ index a3e1e00b2c..22f479120b 100644 JVM_FEATURES_jvmci="jvmci" INCLUDE_JVMCI="true" diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 -index f89b22f5fc..a7aa4f4aaa 100644 +index 5d1d9efa39..815180ea96 100644 --- a/make/autoconf/platform.m4 +++ b/make/autoconf/platform.m4 @@ -23,6 +23,12 @@ @@ -141,10 +141,10 @@ index fdd2c0ca3d..318191233a 100644 assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp -index cebc1e410d..816226c068 100644 +index 18b3ea147d..f3398e191c 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp -@@ -260,18 +260,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { +@@ -262,18 +262,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ store(reg, addr); } @@ -1293,10 +1293,10 @@ index 0000000000..e6e62cccad +} diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp new file mode 100644 -index 0000000000..7d9d24f2cd +index 0000000000..179da7bd0e --- /dev/null +++ b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp -@@ -0,0 +1,2811 @@ +@@ -0,0 +1,2827 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. @@ -2613,6 +2613,15 @@ index 0000000000..7d9d24f2cd + static int high6 (int x) { return high(x, 6); } + + ++ static ALWAYSINLINE void patch(address a, int length, uint32_t val) { ++ guarantee(val < (1ULL << length), "Field too big for insn"); ++ guarantee(length > 0, "length > 0"); ++ unsigned target = *(unsigned *)a; ++ target = (target >> length) << length; ++ target |= val; ++ *(unsigned *)a = target; ++ } ++ + protected: + // help methods for instruction ejection + @@ -3326,18 +3335,25 @@ index 0000000000..7d9d24f2cd + void bceqz(ConditionalFlagRegister cj, Label& L) { bceqz(cj, target(L)); } + void bcnez(ConditionalFlagRegister cj, Label& L) { bcnez(cj, target(L)); } + -+ // Now Membar_mask_bits is 0,Need to fix it after LA6000 + typedef enum { -+ StoreStore = 0, -+ LoadStore = 0, -+ StoreLoad = 0, -+ LoadLoad = 0, -+ AnyAny = 0 ++ // hint[4] ++ Completion = 0, ++ Ordering = (1 << 4), ++ ++ // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation. ++ // hint[3:2] and hint[1:0] ++ LoadLoad = ((1 << 3) | (1 << 1)), ++ LoadStore = ((1 << 3) | (1 << 0)), ++ StoreLoad = ((1 << 2) | (1 << 1)), ++ StoreStore = ((1 << 2) | (1 << 0)), ++ AnyAny = ((3 << 2) | (3 << 0)), + } Membar_mask_bits; + + // Serializes memory and blows flags + void membar(Membar_mask_bits hint) { -+ dbar(hint); ++ assert((hint & (3 << 0)) != 0, "membar mask unsupported!"); ++ assert((hint & (3 << 2)) != 0, "membar mask unsupported!"); ++ dbar(Ordering | (~hint & 0xf)); + } + + // LSX and LASX @@ -8729,13 +8745,13 @@ index 0000000000..c989e25c3a +#undef __ diff --git a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp new file mode 100644 -index 0000000000..72a80f37c4 +index 0000000000..6cb77f3fbe --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp -@@ -0,0 +1,1396 @@ +@@ -0,0 +1,1398 @@ +/* + * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. ++ * Copyright (c) 2021, 2024, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -8891,8 +8907,10 @@ index 0000000000..72a80f37c4 + if (index->is_register()) { + // apply the shift and accumulate the displacement + if (shift > 0) { -+ LIR_Opr tmp = new_pointer_register(); -+ __ shift_left(index, shift, tmp); ++ // Use long register to avoid overflow when shifting large index values left. ++ LIR_Opr tmp = new_register(T_LONG); ++ __ convert(Bytecodes::_i2l, index, tmp); ++ __ shift_left(tmp, shift, tmp); + index = tmp; + } + if (large_disp != 0) { @@ -12553,7 +12571,7 @@ index 0000000000..04359bc172 +#endif // CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.cpp b/src/hotspot/cpu/loongarch/frame_loongarch.cpp new file mode 100644 -index 0000000000..9b4f3b88d4 +index 0000000000..6f6d34e026 --- /dev/null +++ b/src/hotspot/cpu/loongarch/frame_loongarch.cpp @@ -0,0 +1,690 @@ @@ -13102,7 +13120,7 @@ index 0000000000..9b4f3b88d4 + + // first the method + -+ Method* m = *interpreter_frame_method_addr(); ++ Method* m = safe_interpreter_frame_method(); + + // validate the method we'd find in this potential sender + if (!Method::is_valid_method(m)) return false; @@ -14645,13 +14663,13 @@ index 0000000000..a7ebbfaabb +#endif // CPU_LOONGARCH_GC_SHARED_BARRIERSETASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp new file mode 100644 -index 0000000000..749d3a3f79 +index 0000000000..d09e9a75a7 --- /dev/null +++ b/src/hotspot/cpu/loongarch/gc/shared/cardTableBarrierSetAssembler_loongarch.cpp -@@ -0,0 +1,142 @@ +@@ -0,0 +1,140 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -14710,7 +14728,7 @@ index 0000000000..749d3a3f79 + + __ beq(count, R0, L_done); // zero count - nothing to do + -+ if (UseConcMarkSweepGC) __ membar(__ StoreStore); ++ if (ct->scanned_concurrently()) __ membar(__ StoreStore); + + __ li(tmp, disp); + @@ -14753,8 +14771,6 @@ index 0000000000..749d3a3f79 + + jbyte dirty = CardTable::dirty_card_val(); + if (UseCondCardMark) { -+ Untested("Untested"); -+ __ warn("store_check Untested"); + Label L_already_dirty; + __ membar(__ StoreLoad); + __ ld_b(AT, tmp, 0); @@ -14764,7 +14780,7 @@ index 0000000000..749d3a3f79 + __ bind(L_already_dirty); + } else { + if (ct->scanned_concurrently()) { -+ __ membar(Assembler::StoreLoad); ++ __ membar(Assembler::StoreStore); + } + __ st_b(R0, tmp, 0); + } @@ -18663,13 +18679,13 @@ index 0000000000..80dff0c762 + diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad new file mode 100644 -index 0000000000..a5fb5f7b85 +index 0000000000..cc3824a402 --- /dev/null +++ b/src/hotspot/cpu/loongarch/loongarch_64.ad -@@ -0,0 +1,13906 @@ +@@ -0,0 +1,13917 @@ +// +// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. -+// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++// Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it @@ -28730,14 +28746,25 @@ index 0000000000..a5fb5f7b85 +%} + +// Store CMS card-mark Immediate 0 ++instruct storeImmCM_order(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreCM mem zero)); ++ predicate(UseConcMarkSweepGC && !UseCondCardMark); ++ ins_cost(100); ++ format %{ "StoreCM MEMBAR storestore\n\t" ++ "st_b $mem, zero\t! card-mark imm0" %} ++ ins_encode %{ ++ __ membar(__ StoreStore); ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ +instruct storeImmCM(memory mem, immI_0 zero) %{ + match(Set mem (StoreCM mem zero)); + + ins_cost(150); -+ format %{ "StoreCM MEMBAR loadstore\n\t" -+ "st_b $mem, zero\t! CMS card-mark imm0" %} ++ format %{ "st_b $mem, zero\t! card-mark imm0" %} + ins_encode %{ -+ __ membar(__ StoreStore); + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); @@ -32575,13 +32602,13 @@ index 0000000000..a5fb5f7b85 + diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp new file mode 100644 -index 0000000000..5d0c8c45fb +index 0000000000..9720fd176d --- /dev/null +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp -@@ -0,0 +1,4563 @@ +@@ -0,0 +1,4567 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. ++ * Copyright (c) 2017, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -34407,7 +34434,7 @@ index 0000000000..5d0c8c45fb + + bind(fail); + if (barrier) -+ membar(LoadLoad); ++ dbar(0x700); + if (retold && oldval != R0) + move(oldval, resflag); + move(resflag, R0); @@ -34430,7 +34457,7 @@ index 0000000000..5d0c8c45fb + + bind(neq); + if (barrier) -+ membar(LoadLoad); ++ dbar(0x700); + if (retold && oldval != R0) + move(oldval, tmp); + if (fail) @@ -34455,7 +34482,7 @@ index 0000000000..5d0c8c45fb + + bind(fail); + if (barrier) -+ membar(LoadLoad); ++ dbar(0x700); + if (retold && oldval != R0) + move(oldval, resflag); + move(resflag, R0); @@ -34480,7 +34507,7 @@ index 0000000000..5d0c8c45fb + + bind(neq); + if (barrier) -+ membar(LoadLoad); ++ dbar(0x700); + if (retold && oldval != R0) + move(oldval, tmp); + if (fail) @@ -34858,7 +34885,7 @@ index 0000000000..5d0c8c45fb + move(AT, R0); + bnez(scrReg, DONE_SET); + -+ membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore)); ++ membar(Assembler::Membar_mask_bits(LoadStore|StoreStore)); + st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); + li(resReg, 1); + b(DONE); @@ -37063,10 +37090,14 @@ index 0000000000..5d0c8c45fb + address last = code()->last_insn(); + if (last != NULL && ((NativeInstruction*)last)->is_sync() && prev == last) { + code()->set_last_insn(NULL); ++ NativeMembar *membar = (NativeMembar*)prev; ++ // merged membar ++ // e.g. LoadLoad and LoadLoad|LoadStore to LoadLoad|LoadStore ++ membar->set_hint(membar->get_hint() & (~hint & 0xF)); + block_comment("merged membar"); + } else { + code()->set_last_insn(pc()); -+ dbar(hint); ++ Assembler::membar(hint); + } +} + @@ -38015,13 +38046,13 @@ index 0000000000..49302590c3 +#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp new file mode 100644 -index 0000000000..3ed4c36651 +index 0000000000..6e27a69747 --- /dev/null +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp -@@ -0,0 +1,1625 @@ +@@ -0,0 +1,1626 @@ +/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT) -+ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * Copyright (c) 2022, 2024, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -38920,7 +38951,7 @@ index 0000000000..3ed4c36651 + b(Q_DONE); + bind(JX_IS_0); + if (UseLASX) { -+ xvfmul_d(v28, v18, v6); // f[0,1] * x[0] ++ xvfmul_d(v28, v18, v6); // f[0,3] * x[0] + fmul_d(v30, v19, v6); // f[4] * x[0] + } else { + vfmul_d(v28, v18, v6); // f[0,1] * x[0] @@ -39149,6 +39180,7 @@ index 0000000000..3ed4c36651 + st_w(tmp2, SCR2, 0); + addi_w(SCR1, SCR1, 24); + addi_w(jz, jz, 1); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); + st_w(tmp3, SCR2, 0); // iq[jz] = (int) fw + b(Z_ZERO_CHECK_DONE); + bind(Z_IS_LESS_THAN_TWO24B); @@ -40801,10 +40833,10 @@ index 0000000000..9234befae3 +} diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp new file mode 100644 -index 0000000000..195a2df580 +index 0000000000..a6e9d4dd3c --- /dev/null +++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp -@@ -0,0 +1,521 @@ +@@ -0,0 +1,528 @@ +/* + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. @@ -41325,6 +41357,13 @@ index 0000000000..195a2df580 + assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found"); + return (NativeCallTrampolineStub*)addr; +} ++ ++class NativeMembar : public NativeInstruction { ++public: ++ unsigned int get_hint() { return Assembler::low(insn_word(), 4); } ++ void set_hint(int hint) { Assembler::patch(addr_at(0), 4, hint); } ++}; ++ +#endif // CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp new file mode 100644 @@ -42441,7 +42480,7 @@ index 0000000000..334c783b37 +} diff --git a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp new file mode 100644 -index 0000000000..736ed0a85f +index 0000000000..bc91ee005e --- /dev/null +++ b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp @@ -0,0 +1,3621 @@ @@ -42760,9 +42799,9 @@ index 0000000000..736ed0a85f +} + +// Is vector's size (in bytes) bigger than a size saved by default? -+// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. ++// 8 bytes registers are saved by default using fld/fst instructions. +bool SharedRuntime::is_wide_vector(int size) { -+ return size > 16; ++ return size > 8; +} + +size_t SharedRuntime::trampoline_size() { @@ -46068,7 +46107,7 @@ index 0000000000..736ed0a85f +extern "C" int SpinPause() {return 0;} diff --git a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp new file mode 100644 -index 0000000000..0549c3c58f +index 0000000000..7f73863b2e --- /dev/null +++ b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp @@ -0,0 +1,4804 @@ @@ -46781,8 +46820,8 @@ index 0000000000..0549c3c58f + + // disjoint large copy + void generate_disjoint_large_copy(Label &entry, const char *name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + + Label loop, le32, le16, le8, lt8; + @@ -46862,8 +46901,8 @@ index 0000000000..0549c3c58f + + // disjoint large copy lsx + void generate_disjoint_large_copy_lsx(Label &entry, const char *name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + + Label loop, le64, le32, le16, lt16; + @@ -46944,8 +46983,8 @@ index 0000000000..0549c3c58f + + // disjoint large copy lasx + void generate_disjoint_large_copy_lasx(Label &entry, const char *name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + + Label loop, le128, le64, le32, lt32; + @@ -47026,8 +47065,8 @@ index 0000000000..0549c3c58f + + // conjoint large copy + void generate_conjoint_large_copy(Label &entry, const char *name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + + Label loop, le32, le16, le8, lt8; + @@ -47104,8 +47143,8 @@ index 0000000000..0549c3c58f + + // conjoint large copy lsx + void generate_conjoint_large_copy_lsx(Label &entry, const char *name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + + Label loop, le64, le32, le16, lt16; + @@ -47183,8 +47222,8 @@ index 0000000000..0549c3c58f + + // conjoint large copy lasx + void generate_conjoint_large_copy_lasx(Label &entry, const char *name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + + Label loop, le128, le64, le32, lt32; + @@ -47262,8 +47301,8 @@ index 0000000000..0549c3c58f + + // Byte small copy: less than { int:9, lsx:17, lasx:33 } elements. + void generate_byte_small_copy(Label &entry, const char *name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + + Label L; + __ bind(entry); @@ -47628,8 +47667,8 @@ index 0000000000..0549c3c58f + // + address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large, + Label &large_aligned, const char * name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + if (UseLASX) @@ -47668,8 +47707,8 @@ index 0000000000..0549c3c58f + // + address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large, + Label &large_aligned, const char *name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0); @@ -47695,8 +47734,8 @@ index 0000000000..0549c3c58f + + // Short small copy: less than { int:9, lsx:9, lasx:17 } elements. + void generate_short_small_copy(Label &entry, const char *name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + + Label L; + __ bind(entry); @@ -47907,8 +47946,8 @@ index 0000000000..0549c3c58f + // + address generate_disjoint_short_copy(bool aligned, Label &small, Label &large, + Label &large_aligned, const char * name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + if (UseLASX) @@ -47947,8 +47986,8 @@ index 0000000000..0549c3c58f + // + address generate_conjoint_short_copy(bool aligned, Label &small, Label &large, + Label &large_aligned, const char *name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1); @@ -47974,8 +48013,8 @@ index 0000000000..0549c3c58f + + // Int small copy: less than { int:7, lsx:7, lasx:9 } elements. + void generate_int_small_copy(Label &entry, const char *name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + + Label L; + __ bind(entry); @@ -48211,8 +48250,8 @@ index 0000000000..0549c3c58f + address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small, + Label &large, Label &large_aligned, const char *name, + int small_limit, bool dest_uninitialized = false) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned, @@ -48239,8 +48278,8 @@ index 0000000000..0549c3c58f + address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, Label &small, + Label &large, Label &large_aligned, const char *name, + int small_limit, bool dest_uninitialized = false) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + if (is_oop) { @@ -48257,8 +48296,8 @@ index 0000000000..0549c3c58f + + // Long small copy: less than { int:4, lsx:4, lasx:5 } elements. + void generate_long_small_copy(Label &entry, const char *name) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + + Label L; + __ bind(entry); @@ -48361,8 +48400,8 @@ index 0000000000..0549c3c58f + address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, + Label &large, Label &large_aligned, const char *name, + int small_limit, bool dest_uninitialized = false) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + gen_maybe_oop_copy(is_oop, true, aligned, small, large, large_aligned, @@ -48389,8 +48428,8 @@ index 0000000000..0549c3c58f + address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, + Label &large, Label &large_aligned, const char *name, + int small_limit, bool dest_uninitialized = false) { -+ StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + if (is_oop) { @@ -53459,10 +53498,10 @@ index 0000000000..ddb38faf44 +#endif // CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp new file mode 100644 -index 0000000000..8ad7c5f76e +index 0000000000..673032218f --- /dev/null +++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp -@@ -0,0 +1,4147 @@ +@@ -0,0 +1,4113 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. @@ -55707,38 +55746,6 @@ index 0000000000..8ad7c5f76e + __ jr(T4); +} + -+// ---------------------------------------------------------------------------- -+// Volatile variables demand their effects be made known to all CPU's -+// in order. Store buffers on most chips allow reads & writes to -+// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode -+// without some kind of memory barrier (i.e., it's not sufficient that -+// the interpreter does not reorder volatile references, the hardware -+// also must not reorder them). -+// -+// According to the new Java Memory Model (JMM): -+// (1) All volatiles are serialized wrt to each other. ALSO reads & -+// writes act as aquire & release, so: -+// (2) A read cannot let unrelated NON-volatile memory refs that -+// happen after the read float up to before the read. It's OK for -+// non-volatile memory refs that happen before the volatile read to -+// float down below it. -+// (3) Similar a volatile write cannot let unrelated NON-volatile -+// memory refs that happen BEFORE the write float down to after the -+// write. It's OK for non-volatile memory refs that happen after the -+// volatile write to float up before it. -+// -+// We only put in barriers around volatile refs (they are expensive), -+// not _between_ memory refs (that would require us to track the -+// flavor of the previous memory refs). Requirements (2) and (3) -+// require some barriers before volatile stores and after volatile -+// loads. These nearly cover requirement (1) but miss the -+// volatile-store-volatile-load case. This final case is placed after -+// volatile-stores although it could just as well go before -+// volatile-loads. -+void TemplateTable::volatile_barrier() { -+ if(os::is_MP()) __ membar(__ StoreLoad); -+} -+ +// we dont shift left 2 bits in get_cache_and_index_at_bcp +// for we always need shift the index we use it. the ConstantPoolCacheEntry +// is 16-byte long, index is the index in @@ -55934,7 +55941,7 @@ index 0000000000..8ad7c5f76e + + Label notVolatile; + __ beq(scratch, R0, notVolatile); -+ volatile_barrier(); ++ __ membar(MacroAssembler::AnyAny); + __ bind(notVolatile); + } + @@ -56080,7 +56087,7 @@ index 0000000000..8ad7c5f76e + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); -+ volatile_barrier(); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + __ bind(notVolatile); + } +} @@ -56196,7 +56203,7 @@ index 0000000000..8ad7c5f76e + + Label notVolatile; + __ beq(scratch, R0, notVolatile); -+ volatile_barrier(); ++ __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); + __ bind(notVolatile); + } + @@ -56368,7 +56375,7 @@ index 0000000000..8ad7c5f76e + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); -+ volatile_barrier(); ++ __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); + __ bind(notVolatile); + } +} @@ -56477,7 +56484,7 @@ index 0000000000..8ad7c5f76e + + Label notVolatile; + __ beq(scratch, R0, notVolatile); -+ volatile_barrier(); ++ __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); + __ bind(notVolatile); + } + @@ -56526,7 +56533,7 @@ index 0000000000..8ad7c5f76e + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); -+ volatile_barrier(); ++ __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); + __ bind(notVolatile); + } +} @@ -56577,7 +56584,7 @@ index 0000000000..8ad7c5f76e + + Label notVolatile; + __ beq(scratch, R0, notVolatile); -+ volatile_barrier(); ++ __ membar(MacroAssembler::AnyAny); + __ bind(notVolatile); + } + @@ -56621,7 +56628,7 @@ index 0000000000..8ad7c5f76e + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); -+ volatile_barrier(); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + __ bind(notVolatile); + } +} @@ -56651,7 +56658,7 @@ index 0000000000..8ad7c5f76e + + Label notVolatile; + __ beq(scratch, R0, notVolatile); -+ volatile_barrier(); ++ __ membar(MacroAssembler::AnyAny); + __ bind(notVolatile); + } + @@ -56676,7 +56683,7 @@ index 0000000000..8ad7c5f76e + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); -+ volatile_barrier(); ++ __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + __ bind(notVolatile); + } +} @@ -57008,7 +57015,6 @@ index 0000000000..8ad7c5f76e + + __ bind(no_such_method); + // throw exception -+ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) + __ restore_bcp(); + __ restore_locals(); + // Pass arguments for generating a verbose error message. @@ -57022,7 +57028,6 @@ index 0000000000..8ad7c5f76e + + __ bind(no_such_interface); + // throw exception -+ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) + __ restore_bcp(); + __ restore_locals(); + // Pass arguments for generating a verbose error message. @@ -57830,7 +57835,7 @@ index 0000000000..1a93123134 +#endif // CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp new file mode 100644 -index 0000000000..0a9b55d17e +index 0000000000..9115135166 --- /dev/null +++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp @@ -0,0 +1,397 @@ @@ -57995,7 +58000,7 @@ index 0000000000..0a9b55d17e + _supports_cx8 = true; + + if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { -+ FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650); ++ FLAG_SET_DEFAULT(MaxGCPauseMillis, 150); + } + + if (supports_lsx()) { @@ -64386,13 +64391,13 @@ index 0000000000..b97ecbcca5 +#endif // CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp new file mode 100644 -index 0000000000..cb1d53db0a +index 0000000000..f33165334c --- /dev/null +++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp -@@ -0,0 +1,149 @@ +@@ -0,0 +1,147 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * Copyright (c) 2018, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -64451,7 +64456,7 @@ index 0000000000..cb1d53db0a + __ beq(count, R0, L_done); // zero count - nothing to do + __ delayed()->nop(); + -+ if (UseConcMarkSweepGC) __ sync(); ++ if (ct->scanned_concurrently()) __ membar(Assembler::StoreStore); + + __ set64(tmp, disp); + @@ -64500,8 +64505,6 @@ index 0000000000..cb1d53db0a + + jbyte dirty = CardTable::dirty_card_val(); + if (UseCondCardMark) { -+ Untested("Untested"); -+ __ warn("store_check Untested"); + Label L_already_dirty; + __ membar(Assembler::StoreLoad); + __ lb(AT, tmp, 0); @@ -64512,7 +64515,7 @@ index 0000000000..cb1d53db0a + __ bind(L_already_dirty); + } else { + if (ct->scanned_concurrently()) { -+ __ membar(Assembler::StoreLoad); ++ __ membar(Assembler::StoreStore); + } + __ sb(R0, tmp, 0); + } @@ -104399,7 +104402,7 @@ index 0000000000..75c23e8088 + return icache_line_size; +} diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -index 847f7d61d2..f570946090 100644 +index 243cde8d74..124efbfb1b 100644 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp @@ -488,6 +488,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { @@ -104424,10 +104427,10 @@ index 847f7d61d2..f570946090 100644 void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp -index d34ea45c0b..f6b6dbdee3 100644 +index 8bb8c441b2..32384c6491 100644 --- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp -@@ -273,21 +273,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { +@@ -275,21 +275,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ move(temp, addr); } @@ -104518,10 +104521,10 @@ index 897be2209e..0c27cc20f3 100644 CodeEmitInfo* info, bool pop_fpu_stack) { assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); diff --git a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp -index ae297ac635..c786803e0f 100644 +index 86eb2fe88c..114aacaade 100644 --- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp -@@ -213,16 +213,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { +@@ -215,16 +215,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); } @@ -104607,10 +104610,10 @@ index e503159eb7..2e5609fec8 100644 void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { assert(info == NULL, "unused on this code path"); diff --git a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp -index a09a159722..a02ffafc77 100644 +index b324a3dbd8..0b7cb52dcd 100644 --- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp +++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp -@@ -267,19 +267,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { +@@ -269,19 +269,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ move(temp, addr); } @@ -104790,7 +104793,7 @@ index 3687754e71..791e4ed43f 100644 void generate_c1_load_barrier_stub(LIR_Assembler* ce, ZLoadBarrierStubC1* stub) const; diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp -index 74945999e7..2b8ac3dd2a 100644 +index 2842a11f92..4f58ec4be3 100644 --- a/src/hotspot/os/linux/os_linux.cpp +++ b/src/hotspot/os/linux/os_linux.cpp @@ -23,6 +23,12 @@ @@ -104806,7 +104809,7 @@ index 74945999e7..2b8ac3dd2a 100644 // no precompiled headers #include "jvm.h" #include "classfile/classLoader.hpp" -@@ -3966,6 +3972,8 @@ size_t os::Linux::find_large_page_size() { +@@ -4060,6 +4066,8 @@ size_t os::Linux::find_large_page_size() { IA64_ONLY(256 * M) PPC_ONLY(4 * M) S390_ONLY(1 * M) @@ -104847,13 +104850,13 @@ index 0000000000..30719a0340 + */ diff --git a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp new file mode 100644 -index 0000000000..86f8c963f5 +index 0000000000..8403e7838a --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -104972,7 +104975,7 @@ index 0000000000..86f8c963f5 + " sc.w %[__cmp], %[__dest] \n\t" + " beqz %[__cmp], 1b \n\t" + "2: \n\t" -+ " dbar 0 \n\t" ++ " dbar 0x700 \n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) + : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) @@ -105000,7 +105003,7 @@ index 0000000000..86f8c963f5 + " sc.d %[__cmp], %[__dest] \n\t" + " beqz %[__cmp], 1b \n\t" + "2: \n\t" -+ " dbar 0 \n\t" ++ " dbar 0x700 \n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) + : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) @@ -105267,7 +105270,7 @@ index 0000000000..ebd73af0c5 + diff --git a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp new file mode 100644 -index 0000000000..295d20e19e +index 0000000000..5429a1055a --- /dev/null +++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp @@ -0,0 +1,51 @@ @@ -105304,19 +105307,19 @@ index 0000000000..295d20e19e +// Included in orderAccess.hpp header file. + +// Implementation of class OrderAccess. -+#define inlasm_sync() if (os::is_ActiveCoresMP()) \ ++#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \ + __asm__ __volatile__ ("nop" : : : "memory"); \ + else \ -+ __asm__ __volatile__ ("dbar 0" : : : "memory"); ++ __asm__ __volatile__ ("dbar %0" : :"K"(v) : "memory"); + -+inline void OrderAccess::loadload() { inlasm_sync(); } -+inline void OrderAccess::storestore() { inlasm_sync(); } -+inline void OrderAccess::loadstore() { inlasm_sync(); } -+inline void OrderAccess::storeload() { inlasm_sync(); } ++inline void OrderAccess::loadload() { inlasm_sync(0x15); } ++inline void OrderAccess::storestore() { inlasm_sync(0x1a); } ++inline void OrderAccess::loadstore() { inlasm_sync(0x16); } ++inline void OrderAccess::storeload() { inlasm_sync(0x19); } + -+inline void OrderAccess::acquire() { inlasm_sync(); } -+inline void OrderAccess::release() { inlasm_sync(); } -+inline void OrderAccess::fence() { inlasm_sync(); } ++inline void OrderAccess::acquire() { inlasm_sync(0x14); } ++inline void OrderAccess::release() { inlasm_sync(0x12); } ++inline void OrderAccess::fence() { inlasm_sync(0x10); } + + +#undef inlasm_sync @@ -109083,10 +109086,10 @@ index 44a5bcbe54..114b155f92 100644 void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); void ic_call( LIR_OpJavaCall* op); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp -index f4b156d59b..fc35f02772 100644 +index 88f6d30697..1d5a6668ea 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp -@@ -479,13 +479,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, +@@ -480,13 +480,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, CodeEmitInfo* null_check_info, CodeEmitInfo* range_check_info) { CodeStub* stub = new RangeCheckStub(range_check_info, index, array); if (index->is_constant()) { @@ -109104,7 +109107,7 @@ index f4b156d59b..fc35f02772 100644 } } -@@ -493,12 +491,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, +@@ -494,12 +492,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { CodeStub* stub = new RangeCheckStub(info, index); if (index->is_constant()) { @@ -109121,7 +109124,7 @@ index f4b156d59b..fc35f02772 100644 } __ move(index, result); } -@@ -934,7 +931,7 @@ LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) { +@@ -935,7 +932,7 @@ LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) { return tmp; } @@ -109130,7 +109133,7 @@ index f4b156d59b..fc35f02772 100644 if (if_instr->should_profile()) { ciMethod* method = if_instr->profiled_method(); assert(method != NULL, "method should be set if branch is profiled"); -@@ -955,10 +952,17 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { +@@ -956,10 +953,17 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { __ metadata2reg(md->constant_encoding(), md_reg); LIR_Opr data_offset_reg = new_pointer_register(); @@ -109152,7 +109155,7 @@ index f4b156d59b..fc35f02772 100644 // MDO cells are intptr_t, so the data_reg width is arch-dependent. LIR_Opr data_reg = new_pointer_register(); -@@ -1315,8 +1319,8 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) { +@@ -1316,8 +1320,8 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) { } __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info); @@ -109163,7 +109166,7 @@ index f4b156d59b..fc35f02772 100644 } -@@ -1598,8 +1602,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { +@@ -1599,8 +1603,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { if (GenerateRangeChecks && needs_range_check) { if (use_length) { @@ -109174,7 +109177,7 @@ index f4b156d59b..fc35f02772 100644 } else { array_range_check(array.result(), index.result(), null_check_info, range_check_info); // range_check also does the null check -@@ -1777,12 +1781,9 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) { +@@ -1778,12 +1782,9 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) { CodeEmitInfo* info = state_for(x); CodeStub* stub = new RangeCheckStub(info, index.result()); if (index.result()->is_constant()) { @@ -109189,7 +109192,7 @@ index f4b156d59b..fc35f02772 100644 } __ move(index.result(), result); } else { -@@ -1860,8 +1861,8 @@ void LIRGenerator::do_LoadIndexed(LoadIndexed* x) { +@@ -1861,8 +1862,8 @@ void LIRGenerator::do_LoadIndexed(LoadIndexed* x) { } else if (use_length) { // TODO: use a (modified) version of array_range_check that does not require a // constant length to be loaded to a register @@ -109200,7 +109203,7 @@ index f4b156d59b..fc35f02772 100644 } else { array_range_check(array.result(), index.result(), null_check_info, range_check_info); // The range check performs the null check, so clear it out for the load -@@ -2234,19 +2235,14 @@ void LIRGenerator::do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegi +@@ -2235,19 +2236,14 @@ void LIRGenerator::do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegi int high_key = one_range->high_key(); BlockBegin* dest = one_range->sux(); if (low_key == high_key) { @@ -109225,7 +109228,7 @@ index f4b156d59b..fc35f02772 100644 __ branch_destination(L->label()); } } -@@ -2346,12 +2342,11 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) { +@@ -2347,12 +2343,11 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) { __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg); for (int i = 0; i < len; i++) { int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i)); @@ -109242,7 +109245,7 @@ index f4b156d59b..fc35f02772 100644 } LIR_Opr data_reg = new_pointer_register(); -@@ -2365,8 +2360,7 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) { +@@ -2366,8 +2361,7 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) { do_SwitchRanges(create_lookup_ranges(x), value, x->default_sux()); } else { for (int i = 0; i < len; i++) { @@ -109252,7 +109255,7 @@ index f4b156d59b..fc35f02772 100644 } __ jump(x->default_sux()); } -@@ -2404,12 +2398,11 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) { +@@ -2405,12 +2399,11 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) { __ move(LIR_OprFact::intptrConst(default_count_offset), data_offset_reg); for (int i = 0; i < len; i++) { int count_offset = md->byte_offset_of_slot(data, MultiBranchData::case_count_offset(i)); @@ -109269,7 +109272,7 @@ index f4b156d59b..fc35f02772 100644 } LIR_Opr data_reg = new_pointer_register(); -@@ -2424,8 +2417,7 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) { +@@ -2425,8 +2418,7 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) { } else { int len = x->length(); for (int i = 0; i < len; i++) { @@ -109279,7 +109282,7 @@ index f4b156d59b..fc35f02772 100644 } __ jump(x->default_sux()); } -@@ -2935,8 +2927,8 @@ void LIRGenerator::do_IfOp(IfOp* x) { +@@ -2936,8 +2928,8 @@ void LIRGenerator::do_IfOp(IfOp* x) { f_val.dont_load_item(); LIR_Opr reg = rlock_result(x); @@ -109290,7 +109293,7 @@ index f4b156d59b..fc35f02772 100644 } #ifdef JFR_HAVE_INTRINSICS -@@ -2980,8 +2972,7 @@ void LIRGenerator::do_getEventWriter(Intrinsic* x) { +@@ -2981,8 +2973,7 @@ void LIRGenerator::do_getEventWriter(Intrinsic* x) { __ move(LIR_OprFact::oopConst(NULL), result); LIR_Opr jobj = new_register(T_METADATA); __ move_wide(jobj_addr, jobj); @@ -109300,7 +109303,7 @@ index f4b156d59b..fc35f02772 100644 access_load(IN_NATIVE, T_OBJECT, LIR_OprFact::address(new LIR_Address(jobj, T_OBJECT)), result); -@@ -3286,21 +3277,24 @@ void LIRGenerator::do_ProfileInvoke(ProfileInvoke* x) { +@@ -3287,21 +3278,24 @@ void LIRGenerator::do_ProfileInvoke(ProfileInvoke* x) { void LIRGenerator::increment_backedge_counter_conditionally(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeEmitInfo* info, int left_bci, int right_bci, int bci) { if (compilation()->count_backedges()) { @@ -109331,7 +109334,7 @@ index f4b156d59b..fc35f02772 100644 increment_backedge_counter(info, step, bci); } } -@@ -3339,8 +3333,7 @@ void LIRGenerator::decrement_age(CodeEmitInfo* info) { +@@ -3340,8 +3334,7 @@ void LIRGenerator::decrement_age(CodeEmitInfo* info) { // DeoptimizeStub will reexecute from the current state in code info. CodeStub* deopt = new DeoptimizeStub(info, Deoptimization::Reason_tenured, Deoptimization::Action_make_not_entrant); @@ -109341,7 +109344,7 @@ index f4b156d59b..fc35f02772 100644 } } -@@ -3386,8 +3379,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, +@@ -3387,8 +3380,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, int freq = frequency << InvocationCounter::count_shift; if (freq == 0) { if (!step->is_constant()) { @@ -109351,7 +109354,7 @@ index f4b156d59b..fc35f02772 100644 } else { __ branch(lir_cond_always, T_ILLEGAL, overflow); } -@@ -3395,12 +3387,11 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, +@@ -3396,12 +3388,11 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, LIR_Opr mask = load_immediate(freq, T_INT); if (!step->is_constant()) { // If step is 0, make sure the overflow check below always fails @@ -109367,7 +109370,7 @@ index f4b156d59b..fc35f02772 100644 } __ branch_destination(overflow->continuation()); } -@@ -3513,8 +3504,7 @@ void LIRGenerator::do_RangeCheckPredicate(RangeCheckPredicate *x) { +@@ -3514,8 +3505,7 @@ void LIRGenerator::do_RangeCheckPredicate(RangeCheckPredicate *x) { CodeEmitInfo *info = state_for(x, x->state()); CodeStub* stub = new PredicateFailedStub(info); @@ -109377,7 +109380,7 @@ index f4b156d59b..fc35f02772 100644 } } -@@ -3661,8 +3651,8 @@ LIR_Opr LIRGenerator::mask_boolean(LIR_Opr array, LIR_Opr value, CodeEmitInfo*& +@@ -3662,8 +3652,8 @@ LIR_Opr LIRGenerator::mask_boolean(LIR_Opr array, LIR_Opr value, CodeEmitInfo*& __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout); int diffbit = Klass::layout_helper_boolean_diffbit(); __ logical_and(layout, LIR_OprFact::intConst(diffbit), layout); @@ -109415,7 +109418,7 @@ index 3ad325d759..f377b27859 100644 ciMethod *method, LIR_Opr step, int frequency, int bci, bool backedge, bool notify); diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp -index c28055fd99..4e7df88102 100644 +index acc969ac9c..1637965613 100644 --- a/src/hotspot/share/c1/c1_LinearScan.cpp +++ b/src/hotspot/share/c1/c1_LinearScan.cpp @@ -35,6 +35,12 @@ @@ -109455,7 +109458,7 @@ index c28055fd99..4e7df88102 100644 default: break; } -@@ -3342,7 +3365,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() { +@@ -3350,7 +3373,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() { check_live = (move->patch_code() == lir_patch_none); } LIR_OpBranch* branch = op->as_OpBranch(); @@ -109466,7 +109469,7 @@ index c28055fd99..4e7df88102 100644 // Don't bother checking the stub in this case since the // exception stub will never return to normal control flow. check_live = false; -@@ -6192,6 +6217,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi +@@ -6200,6 +6225,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch"); LIR_OpBranch* branch = (LIR_OpBranch*)op; @@ -109483,7 +109486,7 @@ index c28055fd99..4e7df88102 100644 if (branch->block() == target_from) { branch->change_block(target_to); } -@@ -6320,6 +6355,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { +@@ -6328,6 +6363,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { } } } @@ -109504,7 +109507,7 @@ index c28055fd99..4e7df88102 100644 } } } -@@ -6395,6 +6444,13 @@ void ControlFlowOptimizer::verify(BlockList* code) { +@@ -6403,6 +6452,13 @@ void ControlFlowOptimizer::verify(BlockList* code) { assert(op_branch->block() == NULL || code->find(op_branch->block()) != -1, "branch target not valid"); assert(op_branch->ublock() == NULL || code->find(op_branch->ublock()) != -1, "branch target not valid"); } @@ -109518,7 +109521,7 @@ index c28055fd99..4e7df88102 100644 } for (j = 0; j < block->number_of_sux() - 1; j++) { -@@ -6639,6 +6695,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) { +@@ -6647,6 +6703,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) { break; } @@ -109544,7 +109547,7 @@ index c28055fd99..4e7df88102 100644 case lir_add: case lir_sub: diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp -index 747971af41..093831ac09 100644 +index 6bc63116bd..41c0a0ea3d 100644 --- a/src/hotspot/share/code/nmethod.cpp +++ b/src/hotspot/share/code/nmethod.cpp @@ -22,6 +22,12 @@ @@ -109560,7 +109563,7 @@ index 747971af41..093831ac09 100644 #include "precompiled.hpp" #include "jvm.h" #include "code/codeCache.hpp" -@@ -2155,7 +2161,8 @@ void nmethod::verify_scopes() { +@@ -2159,7 +2165,8 @@ void nmethod::verify_scopes() { //verify_interrupt_point(iter.addr()); break; case relocInfo::runtime_call_type: @@ -109570,7 +109573,7 @@ index 747971af41..093831ac09 100644 address destination = iter.reloc()->value(); // Right now there is no way to find out which entries support // an interrupt point. It would be nice if we had this -@@ -2392,7 +2399,8 @@ const char* nmethod::reloc_string_for(u_char* begin, u_char* end) { +@@ -2396,7 +2403,8 @@ const char* nmethod::reloc_string_for(u_char* begin, u_char* end) { return st.as_string(); } case relocInfo::runtime_call_type: @@ -109666,7 +109669,7 @@ index 57931a1a6a..fb56fd3ab1 100644 // Trampoline Relocations. // A trampoline allows to encode a small branch in the code, even if there diff --git a/src/hotspot/share/code/vtableStubs.cpp b/src/hotspot/share/code/vtableStubs.cpp -index 7892cc85b3..3637aefe10 100644 +index 3c986f40ff..23d07f0505 100644 --- a/src/hotspot/share/code/vtableStubs.cpp +++ b/src/hotspot/share/code/vtableStubs.cpp @@ -22,6 +22,12 @@ @@ -109954,7 +109957,7 @@ index 9f8ce74243..3c1862d826 100644 } diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp -index 24e4c98175..a6b310290d 100644 +index 6483159136..f40e304f9a 100644 --- a/src/hotspot/share/interpreter/interpreterRuntime.cpp +++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp @@ -22,6 +22,12 @@ @@ -109970,7 +109973,7 @@ index 24e4c98175..a6b310290d 100644 #include "precompiled.hpp" #include "classfile/javaClasses.inline.hpp" #include "classfile/systemDictionary.hpp" -@@ -1506,7 +1512,7 @@ IRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* thread, Meth +@@ -1497,7 +1503,7 @@ IRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* thread, Meth // preparing the same method will be sure to see non-null entry & mirror. IRT_END @@ -110114,10 +110117,10 @@ index 8927063330..b5bb5c2887 100644 #define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp -index 80958b0469..08d13a4189 100644 +index c3a884fafe..103789d9b1 100644 --- a/src/hotspot/share/memory/metaspace.cpp +++ b/src/hotspot/share/memory/metaspace.cpp -@@ -1082,12 +1082,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a +@@ -1083,12 +1083,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a // Don't use large pages for the class space. bool large_pages = false; @@ -110132,7 +110135,7 @@ index 80958b0469..08d13a4189 100644 ReservedSpace metaspace_rs; -@@ -1113,7 +1113,8 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a +@@ -1114,7 +1114,8 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a // below 32g to get a zerobased CCS. For simplicity we reuse the search // strategy for AARCH64. @@ -110142,7 +110145,7 @@ index 80958b0469..08d13a4189 100644 for (char *a = align_up(requested_addr, increment); a < (char*)(1024*G); a += increment) { -@@ -1144,7 +1145,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a +@@ -1145,7 +1146,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a } } @@ -110300,8 +110303,43 @@ index 84123b29ec..77fbacf2d8 100644 #include "services/memTracker.hpp" #include "utilities/dtrace.hpp" #include "utilities/globalDefinitions.hpp" +diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp +index ce23aafa8f..d3dfb74d5b 100644 +--- a/src/hotspot/share/runtime/objectMonitor.cpp ++++ b/src/hotspot/share/runtime/objectMonitor.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023, These ++ * modifications are Copyright (c) 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "classfile/vmSymbols.hpp" + #include "jfr/jfrEvents.hpp" +@@ -308,6 +314,9 @@ void ObjectMonitor::enter(TRAPS) { + } + + assert(_owner != Self, "invariant"); ++ // Thread _succ != current assertion load reording before Thread if (_succ == current) _succ = nullptr. ++ // But expect order is firstly if (_succ == current) _succ = nullptr then _succ != current assertion. ++ DEBUG_ONLY(LOONGARCH64_ONLY(__asm__ __volatile__ ("dbar 0x700\n");)MIPS64_ONLY(OrderAccess::loadload();)) + assert(_succ != Self, "invariant"); + assert(Self->is_Java_thread(), "invariant"); + JavaThread * jt = (JavaThread *) Self; +@@ -469,6 +478,7 @@ void ObjectMonitor::EnterI(TRAPS) { + } + + // The Spin failed -- Enqueue and park the thread ... ++ DEBUG_ONLY(LOONGARCH64_ONLY(__asm__ __volatile__ ("dbar 0x700\n");)MIPS64_ONLY(OrderAccess::loadload();)) + assert(_succ != Self, "invariant"); + assert(_owner != Self, "invariant"); + assert(_Responsible != Self, "invariant"); diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp -index e0f4a2af1f..09cc4b1ba5 100644 +index 1c540bb621..0e44240d40 100644 --- a/src/hotspot/share/runtime/os.cpp +++ b/src/hotspot/share/runtime/os.cpp @@ -22,6 +22,12 @@ @@ -110381,7 +110419,7 @@ index c758fc5743..a8c4638f6a 100644 inline bool is_even(intx x) { return !is_odd(x); } diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp -index cf80253868..f611daf36d 100644 +index 6605ab367c..5a2be6ef15 100644 --- a/src/hotspot/share/utilities/macros.hpp +++ b/src/hotspot/share/utilities/macros.hpp @@ -22,6 +22,12 @@ @@ -110397,7 +110435,7 @@ index cf80253868..f611daf36d 100644 #ifndef SHARE_VM_UTILITIES_MACROS_HPP #define SHARE_VM_UTILITIES_MACROS_HPP -@@ -531,6 +537,38 @@ +@@ -535,6 +541,38 @@ #define NOT_SPARC(code) code #endif @@ -110436,7 +110474,7 @@ index cf80253868..f611daf36d 100644 #if defined(PPC32) || defined(PPC64) #ifndef PPC #define PPC -@@ -623,16 +661,34 @@ +@@ -627,16 +665,34 @@ // OS_CPU_HEADER(vmStructs) --> vmStructs_linux_sparc.hpp // // basename.hpp / basename.inline.hpp @@ -110635,7 +110673,7 @@ index 8318e8e021..07064e76ee 100644 // This C bool type must be int for compatibility with Linux calls and // it would be a mistake to equivalence it to C++ bool on many platforms diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -index de5254d859..eefe55959c 100644 +index c22b5d1cb3..36d6343960 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c @@ -22,6 +22,12 @@ @@ -110651,12 +110689,12 @@ index de5254d859..eefe55959c 100644 #include #include #include -@@ -142,7 +148,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use - #define PTRACE_GETREGS_REQ PT_GETREGS - #endif - --#ifdef PTRACE_GETREGS_REQ -+#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64) +@@ -151,7 +157,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use + return false; + } + return true; +-#elif defined(PTRACE_GETREGS_REQ) ++#elif defined(PTRACE_GETREGS_REQ) && !defined(loongarch64) if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); return false; @@ -116548,7 +116586,7 @@ index 127bb6abcd..c9277604ae 100644 Platform.isSolaris(); } diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -index 77458554b7..05aee6b84c 100644 +index 126a43a900..55bd135f6e 100644 --- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java @@ -45,7 +45,7 @@ import java.util.Set; @@ -116588,35 +116626,8 @@ index 7990c49a1f..025048c6b0 100644 } } } -diff --git a/test/jdk/sun/security/pkcs11/PKCS11Test.java b/test/jdk/sun/security/pkcs11/PKCS11Test.java -index 5d4ee095f7..de622b128b 100644 ---- a/test/jdk/sun/security/pkcs11/PKCS11Test.java -+++ b/test/jdk/sun/security/pkcs11/PKCS11Test.java -@@ -21,6 +21,12 @@ - * questions. - */ - -+/* -+ * This file has been modified by Loongson Technology in 2022, These -+ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made -+ * available on the same license terms set forth above. -+ */ -+ - // common infrastructure for SunPKCS11 tests - - import java.io.BufferedReader; -@@ -732,6 +738,9 @@ public abstract class PKCS11Test { - "/usr/lib64/" }); - osMap.put("Linux-ppc64-64", new String[] { "/usr/lib64/" }); - osMap.put("Linux-ppc64le-64", new String[] { "/usr/lib64/" }); -+ osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"}); -+ osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/", -+ "/usr/lib64/" }); - osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" }); - osMap.put("Windows-x86-32", new String[] {}); - osMap.put("Windows-amd64-64", new String[] {}); diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java -index f4ee0546c7..a600d15b61 100644 +index 6269373c2b..440ec4664f 100644 --- a/test/lib/jdk/test/lib/Platform.java +++ b/test/lib/jdk/test/lib/Platform.java @@ -21,6 +21,12 @@ @@ -116631,8 +116642,8 @@ index f4ee0546c7..a600d15b61 100644 + package jdk.test.lib; - import java.io.FileNotFoundException; -@@ -226,6 +232,14 @@ public class Platform { + import java.io.BufferedReader; +@@ -229,6 +235,14 @@ public class Platform { return isArch("(i386)|(x86(?!_64))"); } diff --git a/ZGC-Redesign-C2-load-barrier-to-expand-on-th.patch b/ZGC-AArch64-Optimizations-and-Fixes.patch similarity index 76% rename from ZGC-Redesign-C2-load-barrier-to-expand-on-th.patch rename to ZGC-AArch64-Optimizations-and-Fixes.patch index 58ed16d66c5739eb962ca2d04319a5ceef9cc576..0fa61a5978ddb46551c8d1fe7b0fe64cfb13d336 100644 --- a/ZGC-Redesign-C2-load-barrier-to-expand-on-th.patch +++ b/ZGC-AArch64-Optimizations-and-Fixes.patch @@ -1,16 +1,108 @@ +From 1932790364789c601d463a4de8f757cf604344c0 Mon Sep 17 00:00:00 2001 + +--- + make/hotspot/gensrc/GensrcAdlc.gmk | 6 + + src/hotspot/cpu/aarch64/aarch64.ad | 207 +- + .../gc/z/zBarrierSetAssembler_aarch64.cpp | 246 ++- + .../gc/z/zBarrierSetAssembler_aarch64.hpp | 26 +- + src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad | 268 +++ + .../cpu/aarch64/macroAssembler_aarch64.cpp | 61 + + .../cpu/aarch64/macroAssembler_aarch64.hpp | 6 + + .../templateInterpreterGenerator_aarch64.cpp | 4 +- + .../cpu/aarch64/vm_version_aarch64.hpp | 8 + + .../cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp | 404 +++- + .../cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp | 30 +- + src/hotspot/cpu/x86/gc/z/z_x86_64.ad | 168 ++ + src/hotspot/cpu/x86/x86.ad | 136 +- + src/hotspot/cpu/x86/x86_64.ad | 437 +---- + .../gc/z/zBackingFile_linux_aarch64.cpp | 2 +- + src/hotspot/share/adlc/formssel.cpp | 8 - + src/hotspot/share/c1/c1_Instruction.cpp | 1 + + src/hotspot/share/ci/ciInstanceKlass.cpp | 44 + + src/hotspot/share/classfile/vmSymbols.hpp | 4 + + .../share/compiler/compilerDirectives.hpp | 3 +- + .../share/gc/shared/c2/barrierSetC2.cpp | 73 +- + .../share/gc/shared/c2/barrierSetC2.hpp | 15 +- + src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp | 1657 +++-------------- + src/hotspot/share/gc/z/c2/zBarrierSetC2.hpp | 181 +- + .../share/gc/z/zBarrierSetAssembler.hpp | 5 +- + src/hotspot/share/gc/z/zGlobals.hpp | 7 +- + src/hotspot/share/gc/z/zHeap.cpp | 5 + + src/hotspot/share/gc/z/zLiveMap.cpp | 20 +- + src/hotspot/share/gc/z/zLiveMap.inline.hpp | 9 +- + src/hotspot/share/gc/z/zMarkStack.cpp | 74 +- + src/hotspot/share/gc/z/zMarkStack.hpp | 1 + + src/hotspot/share/gc/z/zWorkers.cpp | 23 +- + src/hotspot/share/gc/z/zWorkers.hpp | 2 - + src/hotspot/share/gc/z/z_globals.hpp | 6 +- + src/hotspot/share/opto/c2compiler.cpp | 1 + + src/hotspot/share/opto/classes.cpp | 3 - + src/hotspot/share/opto/classes.hpp | 11 - + src/hotspot/share/opto/compile.cpp | 52 +- + src/hotspot/share/opto/compile.hpp | 25 +- + src/hotspot/share/opto/escape.cpp | 15 - + src/hotspot/share/opto/graphKit.cpp | 10 +- + src/hotspot/share/opto/graphKit.hpp | 10 +- + src/hotspot/share/opto/lcm.cpp | 1 - + src/hotspot/share/opto/library_call.cpp | 17 + + src/hotspot/share/opto/loopnode.cpp | 1 - + src/hotspot/share/opto/loopopts.cpp | 3 - + src/hotspot/share/opto/machnode.hpp | 9 +- + src/hotspot/share/opto/matcher.cpp | 45 +- + src/hotspot/share/opto/memnode.cpp | 14 +- + src/hotspot/share/opto/memnode.hpp | 53 +- + src/hotspot/share/opto/node.cpp | 7 - + src/hotspot/share/opto/node.hpp | 6 - + src/hotspot/share/opto/output.cpp | 424 +++-- + src/hotspot/share/opto/output.hpp | 5 +- + src/hotspot/share/opto/parse1.cpp | 1 + + src/hotspot/share/opto/phaseX.cpp | 8 +- + src/hotspot/share/opto/vectornode.cpp | 1 - + src/hotspot/share/runtime/sharedRuntime.cpp | 2 + + src/hotspot/share/runtime/sharedRuntime.hpp | 5 + + src/hotspot/share/utilities/bitMap.hpp | 17 +- + src/hotspot/share/utilities/bitMap.inline.hpp | 34 +- + .../share/classes/java/util/Random.java | 2 + + .../runtime/MemberName/MemberNameLeak.java | 1 + + 63 files changed, 1941 insertions(+), 2989 deletions(-) + create mode 100644 src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad + create mode 100644 src/hotspot/cpu/x86/gc/z/z_x86_64.ad + +diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk +index c5a3ac572..2af2f9ac4 100644 +--- a/make/hotspot/gensrc/GensrcAdlc.gmk ++++ b/make/hotspot/gensrc/GensrcAdlc.gmk +@@ -150,6 +150,12 @@ ifeq ($(call check-jvm-feature, compiler2), true) + $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \ + ))) + ++ ifeq ($(call check-jvm-feature, zgc), true) ++ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ ++ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/z/z_$(HOTSPOT_TARGET_CPU).ad \ ++ ))) ++ endif ++ + ifeq ($(call check-jvm-feature, shenandoahgc), true) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad -index af3e593a9..05b36e279 100644 +index a8976d5d4..b253e823a 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad -@@ -1131,6 +1131,7 @@ definitions %{ - source_hpp %{ - - #include "gc/z/c2/zBarrierSetC2.hpp" -+#include "gc/z/zThreadLocalData.hpp" - +@@ -1142,12 +1142,6 @@ definitions %{ + int_def VOLATILE_REF_COST ( 1000, 10 * INSN_COST); %} -@@ -2501,17 +2502,7 @@ void Compile::reshape_address(AddPNode* addp) { +-source_hpp %{ +- +-#include "gc/z/c2/zBarrierSetC2.hpp" +- +-%} +- + //----------SOURCE BLOCK------------------------------------------------------- + // This is a block of C++ code which provides values, functions, and + // definitions necessary in the rest of the architecture description +@@ -2525,17 +2519,7 @@ void Compile::reshape_address(AddPNode* addp) { __ INSN(REG, as_Register(BASE)); \ } @@ -28,10 +120,10 @@ index af3e593a9..05b36e279 100644 +static Address mem2address(int opcode, Register base, int index, int size, int disp) { Address::extend scale; - -@@ -2409,13 +2409,18 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, + +@@ -2554,13 +2538,18 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, } - + if (index == -1) { - (masm.*insn)(reg, Address(base, disp)); + return Address(base, disp); @@ -41,7 +133,7 @@ index af3e593a9..05b36e279 100644 + return Address(base, as_Register(index), scale); } } - + +typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr); +typedef void (MacroAssembler::* mem_insn2)(Register Rt, Register adr); +typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr); @@ -50,10 +142,10 @@ index af3e593a9..05b36e279 100644 static void loadStore(MacroAssembler masm, mem_insn insn, Register reg, int opcode, Register base, int index, int size, int disp, -@@ -2450,9 +2455,20 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, +@@ -2595,9 +2584,20 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, } } - + + // Used for all non-volatile memory accesses. The use of + // $mem->opcode() to discover whether this pattern uses sign-extended + // offsets is something of a kludge. @@ -72,10 +164,10 @@ index af3e593a9..05b36e279 100644 + Register base, int index, int size, int disp) { Address::extend scale; - -@@ -2474,8 +2490,8 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, + +@@ -2619,8 +2619,8 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt, } - + static void loadStore(MacroAssembler masm, mem_vector_insn insn, - FloatRegister reg, MacroAssembler::SIMD_RegVariant T, - int opcode, Register base, int index, int size, int disp) @@ -84,7 +176,7 @@ index af3e593a9..05b36e279 100644 { if (index == -1) { (masm.*insn)(reg, T, Address(base, disp)); -@@ -3797,7 +3805,7 @@ frame %{ +@@ -3921,7 +3921,7 @@ frame %{ static const int hi[Op_RegL + 1] = { // enum name 0, // Op_Node 0, // Op_Set @@ -93,7 +185,7 @@ index af3e593a9..05b36e279 100644 OptoReg::Bad, // Op_RegI R0_H_num, // Op_RegP OptoReg::Bad, // Op_RegF -@@ -6929,7 +6937,7 @@ instruct loadRange(iRegINoSp dst, memory mem) +@@ -7075,7 +7075,7 @@ instruct loadRange(iRegINoSp dst, memory mem) instruct loadP(iRegPNoSp dst, memory mem) %{ match(Set dst (LoadP mem)); @@ -102,7 +194,7 @@ index af3e593a9..05b36e279 100644 ins_cost(4 * INSN_COST); format %{ "ldr $dst, $mem\t# ptr" %} -@@ -7622,6 +7630,7 @@ instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) +@@ -7768,6 +7768,7 @@ instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem) instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem) %{ match(Set dst (LoadP mem)); @@ -110,7 +202,7 @@ index af3e593a9..05b36e279 100644 ins_cost(VOLATILE_REF_COST); format %{ "ldar $dst, $mem\t# ptr" %} -@@ -8506,6 +8515,7 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS +@@ -8652,6 +8653,7 @@ instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoS instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapP mem (Binary oldval newval))); @@ -118,7 +210,7 @@ index af3e593a9..05b36e279 100644 ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); -@@ -8619,7 +8629,7 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL +@@ -8765,7 +8767,7 @@ instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegL instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ @@ -127,7 +219,7 @@ index af3e593a9..05b36e279 100644 match(Set res (CompareAndSwapP mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); -@@ -8750,6 +8760,7 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne +@@ -8896,6 +8898,7 @@ instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN ne %} instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ @@ -135,7 +227,7 @@ index af3e593a9..05b36e279 100644 match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr); -@@ -8849,7 +8860,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN +@@ -8995,7 +8998,7 @@ instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN %} instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ @@ -144,7 +236,7 @@ index af3e593a9..05b36e279 100644 match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr); -@@ -8950,6 +8961,7 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne +@@ -9096,6 +9099,7 @@ instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN ne %} instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ @@ -152,7 +244,7 @@ index af3e593a9..05b36e279 100644 match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); -@@ -9057,8 +9069,8 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN +@@ -9203,8 +9207,8 @@ instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN %} instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ @@ -162,7 +254,7 @@ index af3e593a9..05b36e279 100644 ins_cost(VOLATILE_REF_COST); effect(KILL cr); format %{ -@@ -9108,6 +9120,7 @@ instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{ +@@ -9254,6 +9258,7 @@ instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{ %} instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{ @@ -170,7 +262,7 @@ index af3e593a9..05b36e279 100644 match(Set prev (GetAndSetP mem newv)); ins_cost(2 * VOLATILE_REF_COST); format %{ "atomic_xchg $prev, $newv, [$mem]" %} -@@ -9151,7 +9164,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{ +@@ -9297,7 +9302,7 @@ instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{ %} instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{ @@ -179,30 +271,20 @@ index af3e593a9..05b36e279 100644 match(Set prev (GetAndSetP mem newv)); ins_cost(VOLATILE_REF_COST); format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %} -@@ -17477,145 +17490,238 @@ instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{ - - source %{ +@@ -18518,150 +18523,6 @@ instruct vpopcount2I(vecD dst, vecD src) %{ + ins_pipe(pipe_class_default); + %} +-source %{ +- -#include "gc/z/zBarrierSetAssembler.hpp" -+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) { -+ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak); -+ __ ldr(tmp, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(tmp, tmp, ref); -+ __ cbnz(tmp, *stub->entry()); -+ __ bind(*stub->continuation()); -+} - +- -static void z_load_barrier_slow_reg(MacroAssembler& _masm, Register dst, - Register base, int index, int scale, - int disp, bool weak) { - const address stub = weak ? ZBarrierSet::assembler()->load_barrier_weak_slow_stub(dst) - : ZBarrierSet::assembler()->load_barrier_slow_stub(dst); -+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { -+ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */); -+ __ b(*stub->entry()); -+ __ bind(*stub->continuation()); -+} - +- - if (index == -1) { - if (disp != 0) { - __ lea(dst, Address(base, disp)); @@ -216,53 +298,14 @@ index af3e593a9..05b36e279 100644 - } else { - __ lea(dst, Address(base, disp)); - __ lea(dst, Address(dst, index_reg, Address::lsl(scale))); -+%} -+ -+// Load Pointer -+instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr) -+%{ -+ match(Set dst (LoadP mem)); -+ predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierStrong)); -+ effect(TEMP dst, KILL cr); -+ -+ ins_cost(4 * INSN_COST); -+ -+ format %{ "ldr $dst, $mem" %} -+ -+ ins_encode %{ -+ const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); -+ __ ldr($dst$$Register, ref_addr); -+ if (barrier_data() != ZLoadBarrierElided) { -+ z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, false /* weak */); - } +- } - } -+ %} - +- - __ far_call(RuntimeAddress(stub)); -} -+ ins_pipe(iload_reg_mem); -+%} -+ -+// Load Weak Pointer -+instruct zLoadWeakP(iRegPNoSp dst, memory mem, rFlagsReg cr) -+%{ -+ match(Set dst (LoadP mem)); -+ predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierWeak)); -+ effect(TEMP dst, KILL cr); -+ -+ ins_cost(4 * INSN_COST); - -+ format %{ "ldr $dst, $mem" %} -+ -+ ins_encode %{ -+ const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); -+ __ ldr($dst$$Register, ref_addr); -+ z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, true /* weak */); -+ %} -+ -+ ins_pipe(iload_reg_mem); - %} - +- +-%} +- -// -// Execute ZGC load barrier (strong) slow path -// @@ -288,28 +331,10 @@ index af3e593a9..05b36e279 100644 - ins_encode %{ - z_load_barrier_slow_reg(_masm, $dst$$Register, $mem$$base$$Register, - $mem$$index, $mem$$scale, $mem$$disp, false); -+// Load Pointer Volatile -+instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg cr) -+%{ -+ match(Set dst (LoadP mem)); -+ predicate(UseZGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() == ZLoadBarrierStrong); -+ effect(TEMP dst, KILL cr); -+ -+ ins_cost(VOLATILE_REF_COST); -+ -+ format %{ "ldar $dst, $mem\t" %} -+ -+ ins_encode %{ -+ __ ldar($dst$$Register, $mem$$Register); -+ if (barrier_data() != ZLoadBarrierElided) { -+ z_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, false /* weak */); -+ } - %} +- %} - ins_pipe(pipe_slow); -+ -+ ins_pipe(pipe_serial); - %} - +-%} +- -// -// Execute ZGC load barrier (weak) slow path -// @@ -335,83 +360,30 @@ index af3e593a9..05b36e279 100644 - ins_encode %{ - z_load_barrier_slow_reg(_masm, $dst$$Register, $mem$$base$$Register, - $mem$$index, $mem$$scale, $mem$$disp, true); -+instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -+ predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(KILL cr, TEMP_DEF res); -+ -+ ins_cost(2 * VOLATILE_REF_COST); -+ -+ format %{ "cmpxchg $mem, $oldval, $newval\n\t" -+ "cset $res, EQ" %} -+ -+ ins_encode %{ -+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ false /* acquire */, true /* release */, false /* weak */, rscratch2); -+ __ cset($res$$Register, Assembler::EQ); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(rscratch1, rscratch1, rscratch2); -+ __ cbz(rscratch1, good); -+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ false /* acquire */, true /* release */, false /* weak */, rscratch2); -+ __ cset($res$$Register, Assembler::EQ); -+ __ bind(good); -+ } - %} -+ - ins_pipe(pipe_slow); - %} - +- %} +- ins_pipe(pipe_slow); +-%} +- -// Specialized versions of compareAndExchangeP that adds a keepalive that is consumed -// but doesn't affect output. -+instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -+ predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); -+ effect(KILL cr, TEMP_DEF res); - +- -instruct z_compareAndExchangeP(iRegPNoSp res, indirect mem, - iRegP oldval, iRegP newval, iRegP keepalive, - rFlagsReg cr) %{ - match(Set res (ZCompareAndExchangeP (Binary mem keepalive) (Binary oldval newval))); - ins_cost(2 * VOLATILE_REF_COST); +- ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP_DEF res, KILL cr); - format %{ - "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval" - %} -+ -+ format %{ "cmpxchg $mem, $oldval, $newval\n\t" -+ "cset $res, EQ" %} -+ - ins_encode %{ +- ins_encode %{ - __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, - Assembler::xword, /*acquire*/ false, /*release*/ true, - /*weak*/ false, $res$$Register); -+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ true /* acquire */, true /* release */, false /* weak */, rscratch2); -+ __ cset($res$$Register, Assembler::EQ); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(rscratch1, rscratch1, rscratch2); -+ __ cbz(rscratch1, good); -+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ ); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ true /* acquire */, true /* release */, false /* weak */, rscratch2); -+ __ cset($res$$Register, Assembler::EQ); -+ __ bind(good); -+ } - %} -+ - ins_pipe(pipe_slow); - %} - +- %} +- ins_pipe(pipe_slow); +-%} +- -instruct z_compareAndSwapP(iRegINoSp res, - indirect mem, - iRegP oldval, iRegP newval, iRegP keepalive, @@ -419,16 +391,11 @@ index af3e593a9..05b36e279 100644 - - match(Set res (ZCompareAndSwapP (Binary mem keepalive) (Binary oldval newval))); - match(Set res (ZWeakCompareAndSwapP (Binary mem keepalive) (Binary oldval newval))); -+instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -+ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); -+ predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(TEMP_DEF res, KILL cr); - - ins_cost(2 * VOLATILE_REF_COST); - +- +- ins_cost(2 * VOLATILE_REF_COST); +- - effect(KILL cr); -+ format %{ "cmpxchg $res = $mem, $oldval, $newval" %} - +- - format %{ - "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval" - "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)" @@ -436,93 +403,25 @@ index af3e593a9..05b36e279 100644 - - ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval), - aarch64_enc_cset_eq(res)); -+ ins_encode %{ -+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ false /* acquire */, true /* release */, false /* weak */, $res$$Register); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(rscratch1, rscratch1, $res$$Register); -+ __ cbz(rscratch1, good); -+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ false /* acquire */, true /* release */, false /* weak */, $res$$Register); -+ __ bind(good); -+ } -+ %} - - ins_pipe(pipe_slow); - %} - -+instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ -+ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); -+ predicate(UseZGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(TEMP_DEF res, KILL cr); -+ -+ ins_cost(2 * VOLATILE_REF_COST); -+ -+ format %{ "cmpxchg $res = $mem, $oldval, $newval" %} - +- +- ins_pipe(pipe_slow); +-%} +- +- -instruct z_get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev, - iRegP keepalive) %{ - match(Set prev (ZGetAndSetP mem (Binary newv keepalive))); -+ ins_encode %{ -+ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ true /* acquire */, true /* release */, false /* weak */, $res$$Register); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); -+ __ andr(rscratch1, rscratch1, $res$$Register); -+ __ cbz(rscratch1, good); -+ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */); -+ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, -+ true /* acquire */, true /* release */, false /* weak */, $res$$Register); -+ __ bind(good); -+ } -+ %} -+ -+ ins_pipe(pipe_slow); -+%} -+ -+instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ -+ match(Set prev (GetAndSetP mem newv)); -+ predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(TEMP_DEF prev, KILL cr); - - ins_cost(2 * VOLATILE_REF_COST); -+ - format %{ "atomic_xchg $prev, $newv, [$mem]" %} -+ - ins_encode %{ +- +- ins_cost(2 * VOLATILE_REF_COST); +- format %{ "atomic_xchg $prev, $newv, [$mem]" %} +- ins_encode %{ - __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); -+ __ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register); -+ if (barrier_data() != ZLoadBarrierElided) { -+ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */); -+ } -+ %} -+ -+ ins_pipe(pipe_serial); -+%} -+ -+instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ -+ match(Set prev (GetAndSetP mem newv)); -+ predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); -+ effect(TEMP_DEF prev, KILL cr); -+ -+ ins_cost(VOLATILE_REF_COST); -+ -+ format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %} -+ -+ ins_encode %{ -+ __ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register); -+ if (barrier_data() != ZLoadBarrierElided) { -+ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */); -+ } - %} - ins_pipe(pipe_serial); - %} +- %} +- ins_pipe(pipe_serial); +-%} + + //----------PEEPHOLE RULES----------------------------------------------------- + // These must follow all instruction definitions as they use the names diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp index 8e169ace4..787c0c1af 100644 --- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp @@ -933,11 +832,285 @@ index 7e8be01cc..cca873825 100644 }; #endif // CPU_AARCH64_GC_Z_ZBARRIERSETASSEMBLER_AARCH64_HPP +diff --git a/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad +new file mode 100644 +index 000000000..50cc6f924 +--- /dev/null ++++ b/src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad +@@ -0,0 +1,268 @@ ++// ++// Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++ ++source_hpp %{ ++ ++#include "gc/z/c2/zBarrierSetC2.hpp" ++#include "gc/z/zThreadLocalData.hpp" ++ ++%} ++ ++source %{ ++ ++static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) { ++ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak); ++ __ ldr(tmp, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(tmp, tmp, ref); ++ __ cbnz(tmp, *stub->entry()); ++ __ bind(*stub->continuation()); ++} ++ ++static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { ++ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */); ++ __ b(*stub->entry()); ++ __ bind(*stub->continuation()); ++} ++ ++%} ++ ++// Load Pointer ++instruct zLoadP(iRegPNoSp dst, memory mem, rFlagsReg cr) ++%{ ++ match(Set dst (LoadP mem)); ++ predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierStrong)); ++ effect(TEMP dst, KILL cr); ++ ++ ins_cost(4 * INSN_COST); ++ ++ format %{ "ldr $dst, $mem" %} ++ ++ ins_encode %{ ++ const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); ++ __ ldr($dst$$Register, ref_addr); ++ if (barrier_data() != ZLoadBarrierElided) { ++ z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, false /* weak */); ++ } ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Weak Pointer ++instruct zLoadWeakP(iRegPNoSp dst, memory mem, rFlagsReg cr) ++%{ ++ match(Set dst (LoadP mem)); ++ predicate(UseZGC && !needs_acquiring_load(n) && (n->as_Load()->barrier_data() == ZLoadBarrierWeak)); ++ effect(TEMP dst, KILL cr); ++ ++ ins_cost(4 * INSN_COST); ++ ++ format %{ "ldr $dst, $mem" %} ++ ++ ins_encode %{ ++ const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); ++ __ ldr($dst$$Register, ref_addr); ++ z_load_barrier(_masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, true /* weak */); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Pointer Volatile ++instruct zLoadPVolatile(iRegPNoSp dst, indirect mem /* sync_memory */, rFlagsReg cr) ++%{ ++ match(Set dst (LoadP mem)); ++ predicate(UseZGC && needs_acquiring_load(n) && n->as_Load()->barrier_data() == ZLoadBarrierStrong); ++ effect(TEMP dst, KILL cr); ++ ++ ins_cost(VOLATILE_REF_COST); ++ ++ format %{ "ldar $dst, $mem\t" %} ++ ++ ins_encode %{ ++ __ ldar($dst$$Register, $mem$$Register); ++ if (barrier_data() != ZLoadBarrierElided) { ++ z_load_barrier(_masm, this, Address($mem$$Register), $dst$$Register, rscratch2 /* tmp */, false /* weak */); ++ } ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct zCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(KILL cr, TEMP_DEF res); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ "cmpxchg $mem, $oldval, $newval\n\t" ++ "cset $res, EQ" %} ++ ++ ins_encode %{ ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, ++ false /* acquire */, true /* release */, false /* weak */, rscratch2); ++ __ cset($res$$Register, Assembler::EQ); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(rscratch1, rscratch1, rscratch2); ++ __ cbz(rscratch1, good); ++ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, ++ false /* acquire */, true /* release */, false /* weak */, rscratch2); ++ __ cset($res$$Register, Assembler::EQ); ++ __ bind(good); ++ } ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); ++ effect(KILL cr, TEMP_DEF res); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ "cmpxchg $mem, $oldval, $newval\n\t" ++ "cset $res, EQ" %} ++ ++ ins_encode %{ ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, ++ true /* acquire */, true /* release */, false /* weak */, rscratch2); ++ __ cset($res$$Register, Assembler::EQ); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(rscratch1, rscratch1, rscratch2); ++ __ cbz(rscratch1, good); ++ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), rscratch2 /* ref */, rscratch1 /* tmp */ ); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, ++ true /* acquire */, true /* release */, false /* weak */, rscratch2); ++ __ cset($res$$Register, Assembler::EQ); ++ __ bind(good); ++ } ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zCompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(TEMP_DEF res, KILL cr); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ "cmpxchg $res = $mem, $oldval, $newval" %} ++ ++ ins_encode %{ ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, ++ false /* acquire */, true /* release */, false /* weak */, $res$$Register); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(rscratch1, rscratch1, $res$$Register); ++ __ cbz(rscratch1, good); ++ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, ++ false /* acquire */, true /* release */, false /* weak */, $res$$Register); ++ __ bind(good); ++ } ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zCompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{ ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ predicate(UseZGC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(TEMP_DEF res, KILL cr); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ "cmpxchg $res = $mem, $oldval, $newval" %} ++ ++ ins_encode %{ ++ guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, ++ true /* acquire */, true /* release */, false /* weak */, $res$$Register); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ ldr(rscratch1, Address(rthread, ZThreadLocalData::address_bad_mask_offset())); ++ __ andr(rscratch1, rscratch1, $res$$Register); ++ __ cbz(rscratch1, good); ++ z_load_barrier_slow_path(_masm, this, Address($mem$$Register), $res$$Register /* ref */, rscratch1 /* tmp */); ++ __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, ++ true /* acquire */, true /* release */, false /* weak */, $res$$Register); ++ __ bind(good); ++ } ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct zGetAndSetP(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ ++ match(Set prev (GetAndSetP mem newv)); ++ predicate(UseZGC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(TEMP_DEF prev, KILL cr); ++ ++ ins_cost(2 * VOLATILE_REF_COST); ++ ++ format %{ "atomic_xchg $prev, $newv, [$mem]" %} ++ ++ ins_encode %{ ++ __ atomic_xchg($prev$$Register, $newv$$Register, $mem$$Register); ++ if (barrier_data() != ZLoadBarrierElided) { ++ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */); ++ } ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct zGetAndSetPAcq(indirect mem, iRegP newv, iRegPNoSp prev, rFlagsReg cr) %{ ++ match(Set prev (GetAndSetP mem newv)); ++ predicate(UseZGC && needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong)); ++ effect(TEMP_DEF prev, KILL cr); ++ ++ ins_cost(VOLATILE_REF_COST); ++ ++ format %{ "atomic_xchg_acq $prev, $newv, [$mem]" %} ++ ++ ins_encode %{ ++ __ atomic_xchgal($prev$$Register, $newv$$Register, $mem$$Register); ++ if (barrier_data() != ZLoadBarrierElided) { ++ z_load_barrier(_masm, this, Address(noreg, 0), $prev$$Register, rscratch2 /* tmp */, false /* weak */); ++ } ++ %} ++ ins_pipe(pipe_serial); ++%} ++ diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -index d24ba97e1..08d39c4bd 100644 +index 7f329a45d..5ddf049ce 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -@@ -2096,6 +2096,65 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) { +@@ -2129,6 +2129,67 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) { return count; } @@ -945,57 +1118,59 @@ index d24ba97e1..08d39c4bd 100644 +// Push lots of registers in the bit set supplied. Don't push sp. +// Return the number of words pushed +int MacroAssembler::push_fp(unsigned int bitset, Register stack) { -+ int words_pushed = 0; -+ + // Scan bitset to accumulate register pairs + unsigned char regs[32]; + int count = 0; ++ int i = 0; + for (int reg = 0; reg <= 31; reg++) { + if (1 & bitset) + regs[count++] = reg; + bitset >>= 1; + } -+ regs[count++] = zr->encoding_nocheck(); -+ count &= ~1; // Only push an even number of regs + -+ // Always pushing full 128 bit registers. -+ if (count) { -+ stpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -count * wordSize * 2))); -+ words_pushed += 2; ++ if (!count) { ++ return 0; ++ } ++ ++ add(stack, stack, -count * wordSize * 2); ++ ++ if (count & 1) { ++ strq(as_FloatRegister(regs[0]), Address(stack)); ++ i += 1; + } -+ for (int i = 2; i < count; i += 2) { ++ ++ for (; i < count; i += 2) { + stpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2)); -+ words_pushed += 2; + } + -+ assert(words_pushed == count, "oops, pushed != count"); + return count; +} + +int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { -+ int words_pushed = 0; -+ + // Scan bitset to accumulate register pairs + unsigned char regs[32]; + int count = 0; ++ int i = 0; + for (int reg = 0; reg <= 31; reg++) { + if (1 & bitset) + regs[count++] = reg; + bitset >>= 1; + } -+ regs[count++] = zr->encoding_nocheck(); -+ count &= ~1; + -+ for (int i = 2; i < count; i += 2) { -+ ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2)); -+ words_pushed += 2; ++ if (!count) { ++ return 0; + } -+ if (count) { -+ ldpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, count * wordSize * 2))); -+ words_pushed += 2; ++ ++ if (count & 1) { ++ ldrq(as_FloatRegister(regs[0]), Address(stack)); ++ i += 1; ++ } ++ ++ for (; i < count; i += 2) { ++ ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2)); + } + -+ assert(words_pushed == count, "oops, pushed != count"); ++ add(stack, stack, count * wordSize * 2); + + return count; +} @@ -1004,30 +1179,30 @@ index d24ba97e1..08d39c4bd 100644 void MacroAssembler::verify_heapbase(const char* msg) { #if 0 diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -index edcfd9ceb..60b728e94 100644 +index 01fdf16a0..073854d2b 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -@@ -462,12 +462,18 @@ private: +@@ -463,12 +463,18 @@ private: int push(unsigned int bitset, Register stack); int pop(unsigned int bitset, Register stack); - + + int push_fp(unsigned int bitset, Register stack); + int pop_fp(unsigned int bitset, Register stack); + void mov(Register dst, Address a); - + public: void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); } void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); } - + + void push_fp(RegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } + void pop_fp(RegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } + static RegSet call_clobbered_registers(); - + // Push and pop everything that might be clobbered by a native diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp -index c162024db..6e4eb1a7a 100644 +index 21ba661ea..430f3ee14 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -880,8 +880,8 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { @@ -1041,11 +1216,43 @@ index c162024db..6e4eb1a7a 100644 } __ ldr(rcpool, Address(rmethod, Method::const_offset())); +diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +index 8c9676aed..e417f07be 100644 +--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp ++++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +@@ -75,6 +75,7 @@ public: + CPU_BROADCOM = 'B', + CPU_CAVIUM = 'C', + CPU_DEC = 'D', ++ CPU_HISILICON = 'H', + CPU_INFINEON = 'I', + CPU_MOTOROLA = 'M', + CPU_NVIDIA = 'N', +@@ -107,6 +108,13 @@ public: + static int cpu_variant() { return _variant; } + static int cpu_revision() { return _revision; } + ++ static bool is_hisi_enabled() { ++ if (_cpu == CPU_HISILICON && (_model == 0xd01 || _model == 0xd02 || _model == 0xd03)) { ++ return true; ++ } ++ return false; ++ } ++ + static bool is_zva_enabled() { return 0 <= _zva_length; } + static int zva_length() { + assert(is_zva_enabled(), "ZVA not available"); diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp -index 381211ecc..d88ecf7b8 100644 +index f5de1ed88..4428e96bc 100644 --- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp -@@ -27,16 +27,16 @@ +@@ -24,21 +24,22 @@ + #include "precompiled.hpp" + #include "asm/macroAssembler.inline.hpp" + #include "code/codeBlob.hpp" ++#include "code/vmreg.inline.hpp" + #include "gc/z/zBarrier.inline.hpp" + #include "gc/z/zBarrierSet.hpp" #include "gc/z/zBarrierSetAssembler.hpp" #include "gc/z/zBarrierSetRuntime.hpp" #include "memory/resourceArea.hpp" @@ -1066,7 +1273,7 @@ index 381211ecc..d88ecf7b8 100644 #ifdef PRODUCT #define BLOCK_COMMENT(str) /* nothing */ -@@ -44,6 +44,9 @@ +@@ -46,6 +47,9 @@ #define BLOCK_COMMENT(str) __ block_comment(str) #endif @@ -1076,7 +1283,7 @@ index 381211ecc..d88ecf7b8 100644 static void call_vm(MacroAssembler* masm, address entry_point, Register arg0, -@@ -333,126 +336,326 @@ void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* +@@ -335,126 +339,326 @@ void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* #endif // COMPILER1 @@ -1451,113 +1658,287 @@ index 381211ecc..d88ecf7b8 100644 + } + } + } - } - -- __ ret(0); -- -- return start; --} -+ ~ZSetupArguments() { -+ // Transfer result -+ if (_ref != rax) { -+ __ movq(_ref, rax); + } + +- __ ret(0); +- +- return start; +-} ++ ~ZSetupArguments() { ++ // Transfer result ++ if (_ref != rax) { ++ __ movq(_ref, rax); ++ } ++ } ++}; + + #undef __ ++#define __ masm-> + +-void ZBarrierSetAssembler::barrier_stubs_init() { +- // Load barrier stubs +- int stub_code_size = 256 * 16; // Rough estimate of code size ++void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { ++ BLOCK_COMMENT("ZLoadBarrierStubC2"); + +- ResourceMark rm; +- BufferBlob* bb = BufferBlob::create("zgc_load_barrier_stubs", stub_code_size); +- CodeBuffer buf(bb); +- StubCodeGenerator cgen(&buf); ++ // Stub entry ++ __ bind(*stub->entry()); + +- Register rr = as_Register(0); +- for (int i = 0; i < RegisterImpl::number_of_registers; i++) { +- _load_barrier_slow_stub[i] = generate_load_barrier_stub(&cgen, rr, ON_STRONG_OOP_REF); +- _load_barrier_weak_slow_stub[i] = generate_load_barrier_stub(&cgen, rr, ON_WEAK_OOP_REF); +- rr = rr->successor(); ++ { ++ ZSaveLiveRegisters save_live_registers(masm, stub); ++ ZSetupArguments setup_arguments(masm, stub); ++ __ call(RuntimeAddress(stub->slow_path())); + } ++ ++ // Stub exit ++ __ jmp(*stub->continuation()); + } ++ ++#undef __ ++ ++#endif // COMPILER2 +diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp +index 3687754e7..e433882a4 100644 +--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp ++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp +@@ -24,6 +24,14 @@ + #ifndef CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP + #define CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP + ++#include "code/vmreg.hpp" ++#include "oops/accessDecorators.hpp" ++#ifdef COMPILER2 ++#include "opto/optoreg.hpp" ++#endif // COMPILER2 ++ ++class MacroAssembler; ++ + #ifdef COMPILER1 + class LIR_Assembler; + class LIR_OprDesc; +@@ -32,18 +40,13 @@ class StubAssembler; + class ZLoadBarrierStubC1; + #endif // COMPILER1 + +-class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { +- address _load_barrier_slow_stub[RegisterImpl::number_of_registers]; +- address _load_barrier_weak_slow_stub[RegisterImpl::number_of_registers]; ++#ifdef COMPILER2 ++class Node; ++class ZLoadBarrierStubC2; ++#endif // COMPILER2 + ++class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { + public: +- ZBarrierSetAssembler() : +- _load_barrier_slow_stub(), +- _load_barrier_weak_slow_stub() {} +- +- address load_barrier_slow_stub(Register reg) { return _load_barrier_slow_stub[reg->encoding()]; } +- address load_barrier_weak_slow_stub(Register reg) { return _load_barrier_weak_slow_stub[reg->encoding()]; } +- + virtual void load_at(MacroAssembler* masm, + DecoratorSet decorators, + BasicType type, +@@ -86,7 +89,12 @@ public: + DecoratorSet decorators) const; + #endif // COMPILER1 + +- virtual void barrier_stubs_init(); ++#ifdef COMPILER2 ++ OptoReg::Name refine_register(const Node* node, ++ OptoReg::Name opto_reg); ++ void generate_c2_load_barrier_stub(MacroAssembler* masm, ++ ZLoadBarrierStubC2* stub) const; ++#endif // COMPILER2 + }; + + #endif // CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP +diff --git a/src/hotspot/cpu/x86/gc/z/z_x86_64.ad b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad +new file mode 100644 +index 000000000..38c2e926b +--- /dev/null ++++ b/src/hotspot/cpu/x86/gc/z/z_x86_64.ad +@@ -0,0 +1,168 @@ ++// ++// Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++ ++source_hpp %{ ++ ++#include "gc/z/c2/zBarrierSetC2.hpp" ++#include "gc/z/zThreadLocalData.hpp" ++ ++%} ++ ++source %{ ++ ++static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) { ++ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak); ++ __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); ++ __ jcc(Assembler::notZero, *stub->entry()); ++ __ bind(*stub->continuation()); ++} ++ ++static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { ++ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */); ++ __ jmp(*stub->entry()); ++ __ bind(*stub->continuation()); ++} ++ ++%} ++ ++// Load Pointer ++instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr) ++%{ ++ predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierStrong); ++ match(Set dst (LoadP mem)); ++ effect(KILL cr, TEMP dst); ++ ++ ins_cost(125); ++ ++ format %{ "movq $dst, $mem" %} ++ ++ ins_encode %{ ++ __ movptr($dst$$Register, $mem$$Address); ++ if (barrier_data() != ZLoadBarrierElided) { ++ z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, false /* weak */); ++ } ++ %} ++ ++ ins_pipe(ialu_reg_mem); ++%} ++ ++// Load Weak Pointer ++instruct zLoadWeakP(rRegP dst, memory mem, rFlagsReg cr) ++%{ ++ predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierWeak); ++ match(Set dst (LoadP mem)); ++ effect(KILL cr, TEMP dst); ++ ++ ins_cost(125); ++ ++ format %{ "movq $dst, $mem" %} ++ ++ ins_encode %{ ++ __ movptr($dst$$Register, $mem$$Address); ++ z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, true /* weak */); ++ %} ++ ++ ins_pipe(ialu_reg_mem); ++%} ++ ++instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr) %{ ++ match(Set oldval (CompareAndExchangeP mem (Binary oldval newval))); ++ predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(KILL cr, TEMP tmp); ++ ++ format %{ "lock\n\t" ++ "cmpxchgq $newval, $mem" %} ++ ++ ins_encode %{ ++ if (barrier_data() != ZLoadBarrierElided) { ++ __ movptr($tmp$$Register, $oldval$$Register); ++ } ++ __ lock(); ++ __ cmpxchgptr($newval$$Register, $mem$$Address); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); ++ __ jcc(Assembler::zero, good); ++ z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register); ++ __ movptr($oldval$$Register, $tmp$$Register); ++ __ lock(); ++ __ cmpxchgptr($newval$$Register, $mem$$Address); ++ __ bind(good); ++ } ++ %} ++ ++ ins_pipe(pipe_cmpxchg); ++%} ++ ++instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlagsReg cr, rax_RegP oldval) %{ ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(KILL cr, KILL oldval, TEMP tmp); ++ ++ format %{ "lock\n\t" ++ "cmpxchgq $newval, $mem\n\t" ++ "sete $res\n\t" ++ "movzbl $res, $res" %} ++ ++ ins_encode %{ ++ if (barrier_data() != ZLoadBarrierElided) { ++ __ movptr($tmp$$Register, $oldval$$Register); ++ } ++ __ lock(); ++ __ cmpxchgptr($newval$$Register, $mem$$Address); ++ if (barrier_data() != ZLoadBarrierElided) { ++ Label good; ++ __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); ++ __ jcc(Assembler::zero, good); ++ z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register); ++ __ movptr($oldval$$Register, $tmp$$Register); ++ __ lock(); ++ __ cmpxchgptr($newval$$Register, $mem$$Address); ++ __ bind(good); ++ __ cmpptr($tmp$$Register, $oldval$$Register); + } -+ } -+}; - - #undef __ -+#define __ masm-> - --void ZBarrierSetAssembler::barrier_stubs_init() { -- // Load barrier stubs -- int stub_code_size = 256 * 16; // Rough estimate of code size -+void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const { -+ BLOCK_COMMENT("ZLoadBarrierStubC2"); - -- ResourceMark rm; -- BufferBlob* bb = BufferBlob::create("zgc_load_barrier_stubs", stub_code_size); -- CodeBuffer buf(bb); -- StubCodeGenerator cgen(&buf); -+ // Stub entry -+ __ bind(*stub->entry()); - -- Register rr = as_Register(0); -- for (int i = 0; i < RegisterImpl::number_of_registers; i++) { -- _load_barrier_slow_stub[i] = generate_load_barrier_stub(&cgen, rr, ON_STRONG_OOP_REF); -- _load_barrier_weak_slow_stub[i] = generate_load_barrier_stub(&cgen, rr, ON_WEAK_OOP_REF); -- rr = rr->successor(); -+ { -+ ZSaveLiveRegisters save_live_registers(masm, stub); -+ ZSetupArguments setup_arguments(masm, stub); -+ __ call(RuntimeAddress(stub->slow_path())); - } ++ __ setb(Assembler::equal, $res$$Register); ++ __ movzbl($res$$Register, $res$$Register); ++ %} + -+ // Stub exit -+ __ jmp(*stub->continuation()); - } ++ ins_pipe(pipe_cmpxchg); ++%} + -+#undef __ ++instruct zXChgP(memory mem, rRegP newval, rFlagsReg cr) %{ ++ match(Set newval (GetAndSetP mem newval)); ++ predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); ++ effect(KILL cr); + -+#endif // COMPILER2 -diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp -index 3687754e7..e433882a4 100644 ---- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp -+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp -@@ -24,6 +24,14 @@ - #ifndef CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP - #define CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP - -+#include "code/vmreg.hpp" -+#include "oops/accessDecorators.hpp" -+#ifdef COMPILER2 -+#include "opto/optoreg.hpp" -+#endif // COMPILER2 ++ format %{ "xchgq $newval, $mem" %} + -+class MacroAssembler; ++ ins_encode %{ ++ __ xchgptr($newval$$Register, $mem$$Address); ++ if (barrier_data() != ZLoadBarrierElided) { ++ z_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, false /* weak */); ++ } ++ %} ++ ++ ins_pipe(pipe_cmpxchg); ++%} + - #ifdef COMPILER1 - class LIR_Assembler; - class LIR_OprDesc; -@@ -32,18 +40,13 @@ class StubAssembler; - class ZLoadBarrierStubC1; - #endif // COMPILER1 - --class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { -- address _load_barrier_slow_stub[RegisterImpl::number_of_registers]; -- address _load_barrier_weak_slow_stub[RegisterImpl::number_of_registers]; -+#ifdef COMPILER2 -+class Node; -+class ZLoadBarrierStubC2; -+#endif // COMPILER2 - -+class ZBarrierSetAssembler : public ZBarrierSetAssemblerBase { - public: -- ZBarrierSetAssembler() : -- _load_barrier_slow_stub(), -- _load_barrier_weak_slow_stub() {} -- -- address load_barrier_slow_stub(Register reg) { return _load_barrier_slow_stub[reg->encoding()]; } -- address load_barrier_weak_slow_stub(Register reg) { return _load_barrier_weak_slow_stub[reg->encoding()]; } -- - virtual void load_at(MacroAssembler* masm, - DecoratorSet decorators, - BasicType type, -@@ -86,7 +89,12 @@ public: - DecoratorSet decorators) const; - #endif // COMPILER1 - -- virtual void barrier_stubs_init(); -+#ifdef COMPILER2 -+ OptoReg::Name refine_register(const Node* node, -+ OptoReg::Name opto_reg); -+ void generate_c2_load_barrier_stub(MacroAssembler* masm, -+ ZLoadBarrierStubC2* stub) const; -+#endif // COMPILER2 - }; - - #endif // CPU_X86_GC_Z_ZBARRIERSETASSEMBLER_X86_HPP diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad -index 0fc26e1e4..927db59c8 100644 +index baa7cc774..2a3c91d2c 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1097,138 +1097,6 @@ reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0 @@ -1699,7 +2080,7 @@ index 0fc26e1e4..927db59c8 100644 %} -@@ -1775,8 +1643,8 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo +@@ -1817,8 +1685,8 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo return (UseAVX > 2) ? 6 : 4; } @@ -1711,18 +2092,29 @@ index 0fc26e1e4..927db59c8 100644 // into scratch buffer is used to get size in 64-bit VM. LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); ) diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad -index e256e223a..d127732a1 100644 +index 4607d1600..f8903c655 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad -@@ -541,6 +541,7 @@ reg_class int_rdi_reg(RDI); - source_hpp %{ - - #include "gc/z/c2/zBarrierSetC2.hpp" -+#include "gc/z/zThreadLocalData.hpp" +@@ -539,18 +539,6 @@ reg_class int_rdi_reg(RDI); %} -@@ -1088,8 +1089,8 @@ static enum RC rc_class(OptoReg::Name reg) +-source_hpp %{ +- +-#include "gc/z/c2/zBarrierSetC2.hpp" +- +-%} +- +-source_hpp %{ +-#if INCLUDE_ZGC +-#include "gc/z/zBarrierSetAssembler.hpp" +-#endif +-%} +- + //----------SOURCE BLOCK------------------------------------------------------- + // This is a block of C++ code which provides values, functions, and + // definitions necessary in the rest of the architecture description +@@ -1170,8 +1158,8 @@ static enum RC rc_class(OptoReg::Name reg) static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, uint ireg, outputStream* st); @@ -1733,27 +2125,7 @@ index e256e223a..d127732a1 100644 static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, int dst_offset, uint ireg, outputStream* st) { -@@ -1800,6 +1801,19 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() { - return NO_REG_mask(); - } - -+static void z_load_barrier(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) { -+ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, weak); -+ __ testptr(ref, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); -+ __ jcc(Assembler::notZero, *stub->entry()); -+ __ bind(*stub->continuation()); -+} -+ -+static void z_load_barrier_slow_path(MacroAssembler& _masm, const MachNode* node, Address ref_addr, Register ref, Register tmp) { -+ ZLoadBarrierStubC2* const stub = ZLoadBarrierStubC2::create(node, ref_addr, ref, tmp, false /* weak */); -+ __ jmp(*stub->entry()); -+ __ bind(*stub->continuation()); -+} -+ - %} - - //----------ENCODING BLOCK----------------------------------------------------- -@@ -4284,136 +4298,6 @@ operand cmpOpUCF2() %{ +@@ -4384,136 +4372,6 @@ operand cmpOpUCF2() %{ %} %} @@ -1890,7 +2262,7 @@ index e256e223a..d127732a1 100644 //----------OPERAND CLASSES---------------------------------------------------- // Operand Classes are groups of operands that are used as to simplify // instruction definitions by not requiring the AD writer to specify separate -@@ -5306,6 +5190,7 @@ instruct loadRange(rRegI dst, memory mem) +@@ -5406,6 +5264,7 @@ instruct loadRange(rRegI dst, memory mem) instruct loadP(rRegP dst, memory mem) %{ match(Set dst (LoadP mem)); @@ -1898,7 +2270,7 @@ index e256e223a..d127732a1 100644 ins_cost(125); // XXX format %{ "movq $dst, $mem\t# ptr" %} -@@ -7515,6 +7400,7 @@ instruct storePConditional(memory heap_top_ptr, +@@ -7806,6 +7665,7 @@ instruct storePConditional(memory heap_top_ptr, rax_RegP oldval, rRegP newval, rFlagsReg cr) %{ @@ -1906,7 +2278,7 @@ index e256e223a..d127732a1 100644 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) " -@@ -7566,7 +7452,7 @@ instruct compareAndSwapP(rRegI res, +@@ -7857,7 +7717,7 @@ instruct compareAndSwapP(rRegI res, rax_RegP oldval, rRegP newval, rFlagsReg cr) %{ @@ -1915,7 +2287,7 @@ index e256e223a..d127732a1 100644 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); -@@ -7808,7 +7694,7 @@ instruct compareAndExchangeP( +@@ -8099,7 +7959,7 @@ instruct compareAndExchangeP( rax_RegP oldval, rRegP newval, rFlagsReg cr) %{ @@ -1924,7 +2296,7 @@ index e256e223a..d127732a1 100644 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); effect(KILL cr); -@@ -7953,6 +7839,7 @@ instruct xchgL( memory mem, rRegL newval) %{ +@@ -8244,6 +8104,7 @@ instruct xchgL( memory mem, rRegL newval) %{ instruct xchgP( memory mem, rRegP newval) %{ match(Set newval (GetAndSetP mem newval)); @@ -1932,7 +2304,7 @@ index e256e223a..d127732a1 100644 format %{ "XCHGQ $newval,[$mem]" %} ins_encode %{ __ xchgq($newval$$Register, $mem$$Address); -@@ -11649,6 +11536,7 @@ instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2) +@@ -11940,6 +11801,7 @@ instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2) instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2) %{ match(Set cr (CmpP op1 (LoadP op2))); @@ -1940,7 +2312,7 @@ index e256e223a..d127732a1 100644 ins_cost(500); // XXX format %{ "cmpq $op1, $op2\t# ptr" %} -@@ -11674,7 +11562,8 @@ instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2) +@@ -11965,7 +11827,8 @@ instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2) // and raw pointers have no anti-dependencies. instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2) %{ @@ -1950,7 +2322,7 @@ index e256e223a..d127732a1 100644 match(Set cr (CmpP op1 (LoadP op2))); format %{ "cmpq $op1, $op2\t# raw ptr" %} -@@ -11699,7 +11588,8 @@ instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero) +@@ -11990,7 +11853,8 @@ instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero) // any compare to a zero should be eq/neq. instruct testP_mem(rFlagsReg cr, memory op, immP0 zero) %{ @@ -1960,7 +2332,7 @@ index e256e223a..d127732a1 100644 match(Set cr (CmpP (LoadP op) zero)); ins_cost(500); // XXX -@@ -11712,7 +11602,9 @@ instruct testP_mem(rFlagsReg cr, memory op, immP0 zero) +@@ -12003,7 +11867,9 @@ instruct testP_mem(rFlagsReg cr, memory op, immP0 zero) instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero) %{ @@ -1971,28 +2343,24 @@ index e256e223a..d127732a1 100644 match(Set cr (CmpP (LoadP mem) zero)); format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %} -@@ -12667,274 +12559,126 @@ instruct RethrowException() - // Execute ZGC load barrier (strong) slow path - // +@@ -12954,279 +12820,6 @@ instruct RethrowException() + ins_pipe(pipe_jmp); + %} +-// +-// Execute ZGC load barrier (strong) slow path +-// +- -// When running without XMM regs -instruct loadBarrierSlowRegNoVec(rRegP dst, memory mem, rFlagsReg cr) %{ -+// Load Pointer -+instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr) -+%{ -+ predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierStrong); -+ match(Set dst (LoadP mem)); -+ effect(KILL cr, TEMP dst); - +- - match(Set dst (LoadBarrierSlowReg mem)); - predicate(MaxVectorSize < 16 && !n->as_LoadBarrierSlowReg()->is_weak()); -+ ins_cost(125); - +- - effect(DEF dst, KILL cr); -+ format %{ "movq $dst, $mem" %} - +- - format %{"LoadBarrierSlowRegNoVec $dst, $mem" %} - ins_encode %{ +- ins_encode %{ -#if INCLUDE_ZGC - Register d = $dst$$Register; - ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler(); @@ -2006,42 +2374,28 @@ index e256e223a..d127732a1 100644 -#else - ShouldNotReachHere(); -#endif -+ __ movptr($dst$$Register, $mem$$Address); -+ if (barrier_data() != ZLoadBarrierElided) { -+ z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, false /* weak */); -+ } - %} +- %} - ins_pipe(pipe_slow); -%} - +- -// For XMM and YMM enabled processors -instruct loadBarrierSlowRegXmmAndYmm(rRegP dst, memory mem, rFlagsReg cr, - rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3, - rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7, - rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11, - rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{ -+ ins_pipe(ialu_reg_mem); -+%} - +- - match(Set dst (LoadBarrierSlowReg mem)); - predicate((UseSSE > 0) && (UseAVX <= 2) && (MaxVectorSize >= 16) && !n->as_LoadBarrierSlowReg()->is_weak()); -+// Load Weak Pointer -+instruct zLoadWeakP(rRegP dst, memory mem, rFlagsReg cr) -+%{ -+ predicate(UseZGC && n->as_Load()->barrier_data() == ZLoadBarrierWeak); -+ match(Set dst (LoadP mem)); -+ effect(KILL cr, TEMP dst); - +- - effect(DEF dst, KILL cr, - KILL x0, KILL x1, KILL x2, KILL x3, - KILL x4, KILL x5, KILL x6, KILL x7, - KILL x8, KILL x9, KILL x10, KILL x11, - KILL x12, KILL x13, KILL x14, KILL x15); -+ ins_cost(125); - +- - format %{"LoadBarrierSlowRegXmm $dst, $mem" %} -+ format %{ "movq $dst, $mem" %} - ins_encode %{ +- ins_encode %{ -#if INCLUDE_ZGC - Register d = $dst$$Register; - ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler(); @@ -2055,12 +2409,10 @@ index e256e223a..d127732a1 100644 -#else - ShouldNotReachHere(); -#endif -+ __ movptr($dst$$Register, $mem$$Address); -+ z_load_barrier(_masm, this, $mem$$Address, $dst$$Register, noreg /* tmp */, true /* weak */); - %} +- %} - ins_pipe(pipe_slow); -%} - +- -// For ZMM enabled processors -instruct loadBarrierSlowRegZmm(rRegP dst, memory mem, rFlagsReg cr, - rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3, @@ -2102,29 +2454,22 @@ index e256e223a..d127732a1 100644 -#endif - %} - ins_pipe(pipe_slow); -+ ins_pipe(ialu_reg_mem); - %} - +-%} +- -// -// Execute ZGC load barrier (weak) slow path -// - -// When running without XMM regs -instruct loadBarrierWeakSlowRegNoVec(rRegP dst, memory mem, rFlagsReg cr) %{ -+instruct zCompareAndExchangeP(memory mem, rax_RegP oldval, rRegP newval, rRegP tmp, rFlagsReg cr) %{ -+ match(Set oldval (CompareAndExchangeP mem (Binary oldval newval))); -+ predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(KILL cr, TEMP tmp); - +- - match(Set dst (LoadBarrierSlowReg mem)); - predicate(MaxVectorSize < 16 && n->as_LoadBarrierSlowReg()->is_weak()); -+ format %{ "lock\n\t" -+ "cmpxchgq $newval, $mem" %} - +- - effect(DEF dst, KILL cr); - - format %{"LoadBarrierSlowRegNoVec $dst, $mem" %} - ins_encode %{ +- ins_encode %{ -#if INCLUDE_ZGC - Register d = $dst$$Register; - ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler(); @@ -2138,25 +2483,10 @@ index e256e223a..d127732a1 100644 -#else - ShouldNotReachHere(); -#endif -+ if (barrier_data() != ZLoadBarrierElided) { -+ __ movptr($tmp$$Register, $oldval$$Register); -+ } -+ __ lock(); -+ __ cmpxchgptr($newval$$Register, $mem$$Address); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); -+ __ jcc(Assembler::zero, good); -+ z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register); -+ __ movptr($oldval$$Register, $tmp$$Register); -+ __ lock(); -+ __ cmpxchgptr($newval$$Register, $mem$$Address); -+ __ bind(good); -+ } - %} +- %} - ins_pipe(pipe_slow); -%} - +- -// For XMM and YMM enabled processors -instruct loadBarrierWeakSlowRegXmmAndYmm(rRegP dst, memory mem, rFlagsReg cr, - rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3, @@ -2190,9 +2520,8 @@ index e256e223a..d127732a1 100644 -#endif - %} - ins_pipe(pipe_slow); -+ ins_pipe(pipe_cmpxchg); - %} - +-%} +- -// For ZMM enabled processors -instruct loadBarrierWeakSlowRegZmm(rRegP dst, memory mem, rFlagsReg cr, - rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3, @@ -2222,46 +2551,17 @@ index e256e223a..d127732a1 100644 -#if INCLUDE_ZGC - Register d = $dst$$Register; - ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler(); - +- - assert(d != r12, "Can't be R12!"); - assert(d != r15, "Can't be R15!"); - assert(d != rsp, "Can't be RSP!"); -+instruct zCompareAndSwapP(rRegI res, memory mem, rRegP newval, rRegP tmp, rFlagsReg cr, rax_RegP oldval) %{ -+ match(Set res (CompareAndSwapP mem (Binary oldval newval))); -+ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); -+ predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(KILL cr, KILL oldval, TEMP tmp); -+ -+ format %{ "lock\n\t" -+ "cmpxchgq $newval, $mem\n\t" -+ "sete $res\n\t" -+ "movzbl $res, $res" %} - +- - __ lea(d,$mem$$Address); - __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d))); -#else - ShouldNotReachHere(); -#endif -+ ins_encode %{ -+ if (barrier_data() != ZLoadBarrierElided) { -+ __ movptr($tmp$$Register, $oldval$$Register); -+ } -+ __ lock(); -+ __ cmpxchgptr($newval$$Register, $mem$$Address); -+ if (barrier_data() != ZLoadBarrierElided) { -+ Label good; -+ __ testptr($oldval$$Register, Address(r15_thread, ZThreadLocalData::address_bad_mask_offset())); -+ __ jcc(Assembler::zero, good); -+ z_load_barrier_slow_path(_masm, this, $mem$$Address, $oldval$$Register, $tmp$$Register); -+ __ movptr($oldval$$Register, $tmp$$Register); -+ __ lock(); -+ __ cmpxchgptr($newval$$Register, $mem$$Address); -+ __ bind(good); -+ __ cmpptr($tmp$$Register, $oldval$$Register); -+ } -+ __ setb(Assembler::equal, $res$$Register); -+ __ movzbl($res$$Register, $res$$Register); - %} +- %} - ins_pipe(pipe_slow); -%} - @@ -2275,7 +2575,7 @@ index e256e223a..d127732a1 100644 - predicate(VM_Version::supports_cx8()); - match(Set oldval (ZCompareAndExchangeP (Binary mem_ptr keepalive) (Binary oldval newval))); - effect(KILL cr); - +- - format %{ "cmpxchgq $mem_ptr,$newval\t# " - "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %} - opcode(0x0F, 0xB1); @@ -2285,9 +2585,8 @@ index e256e223a..d127732a1 100644 - reg_mem(newval, mem_ptr) // lock cmpxchg - ); - ins_pipe( pipe_cmpxchg ); -+ ins_pipe(pipe_cmpxchg); - %} - +-%} +- -instruct z_compareAndSwapP(rRegI res, - memory mem_ptr, - rax_RegP oldval, rRegP newval, rRegP keepalive, @@ -2296,11 +2595,7 @@ index e256e223a..d127732a1 100644 - match(Set res (ZCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval))); - match(Set res (ZWeakCompareAndSwapP (Binary mem_ptr keepalive) (Binary oldval newval))); - effect(KILL cr, KILL oldval); -+instruct zXChgP(memory mem, rRegP newval, rFlagsReg cr) %{ -+ match(Set newval (GetAndSetP mem newval)); -+ predicate(UseZGC && n->as_LoadStore()->barrier_data() == ZLoadBarrierStrong); -+ effect(KILL cr); - +- - format %{ "cmpxchgq $mem_ptr,$newval\t# " - "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" - "sete $res\n\t" @@ -2315,28 +2610,34 @@ index e256e223a..d127732a1 100644 - Opcode(0xF), Opcode(0xB6), reg_reg(res, res)); - ins_pipe( pipe_cmpxchg ); -%} -+ format %{ "xchgq $newval, $mem" %} - +- -instruct z_xchgP( memory mem, rRegP newval, rRegP keepalive) %{ - match(Set newval (ZGetAndSetP mem (Binary newval keepalive))); - format %{ "XCHGQ $newval,[$mem]" %} - ins_encode %{ +- ins_encode %{ - __ xchgq($newval$$Register, $mem$$Address); -+ __ xchgptr($newval$$Register, $mem$$Address); -+ if (barrier_data() != ZLoadBarrierElided) { -+ z_load_barrier(_masm, this, Address(noreg, 0), $newval$$Register, noreg /* tmp */, false /* weak */); -+ } - %} +- %} - ins_pipe( pipe_cmpxchg ); -%} -+ -+ ins_pipe(pipe_cmpxchg); -+%} - +- // ============================================================================ // This name is KNOWN by the ADLC and cannot be changed. + // The ADLC forces a 'TypeRawPtr::BOTTOM' output type +diff --git a/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp +index 47894b5c8..f956b53d6 100644 +--- a/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp ++++ b/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp +@@ -51,7 +51,7 @@ + + // Support for building on older Linux systems + #ifndef __NR_memfd_create +-#define __NR_memfd_create 319 ++#define __NR_memfd_create 279 + #endif + #ifndef MFD_CLOEXEC + #define MFD_CLOEXEC 0x0001U diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp -index ef704f472..5ba1fdc57 100644 +index bc1ed2218..c7b855a7e 100644 --- a/src/hotspot/share/adlc/formssel.cpp +++ b/src/hotspot/share/adlc/formssel.cpp @@ -774,11 +774,6 @@ bool InstructForm::captures_bottom_type(FormDict &globals) const { @@ -2351,7 +2652,7 @@ index ef704f472..5ba1fdc57 100644 #if INCLUDE_SHENANDOAHGC !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeP") || !strcmp(_matrule->_rChild->_opType,"ShenandoahCompareAndExchangeN") || -@@ -3513,9 +3508,6 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const { +@@ -3529,9 +3524,6 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const { "StoreCM", "GetAndSetB", "GetAndSetS", "GetAndAddI", "GetAndSetI", "GetAndSetP", "GetAndAddB", "GetAndAddS", "GetAndAddL", "GetAndSetL", "GetAndSetN", @@ -2361,6 +2662,110 @@ index ef704f472..5ba1fdc57 100644 "ClearArray" }; int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*); +diff --git a/src/hotspot/share/c1/c1_Instruction.cpp b/src/hotspot/share/c1/c1_Instruction.cpp +index c4135f695..47fad18c6 100644 +--- a/src/hotspot/share/c1/c1_Instruction.cpp ++++ b/src/hotspot/share/c1/c1_Instruction.cpp +@@ -29,6 +29,7 @@ + #include "c1/c1_ValueStack.hpp" + #include "ci/ciObjArrayKlass.hpp" + #include "ci/ciTypeArrayKlass.hpp" ++#include "utilities/bitMap.inline.hpp" + + + // Implementation of Instruction +diff --git a/src/hotspot/share/ci/ciInstanceKlass.cpp b/src/hotspot/share/ci/ciInstanceKlass.cpp +index 5c65ffff3..081785c41 100644 +--- a/src/hotspot/share/ci/ciInstanceKlass.cpp ++++ b/src/hotspot/share/ci/ciInstanceKlass.cpp +@@ -36,6 +36,7 @@ + #include "runtime/fieldDescriptor.inline.hpp" + #include "runtime/handles.inline.hpp" + #include "runtime/jniHandles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" + + // ciInstanceKlass + // +@@ -42,6 +42,44 @@ + // This class represents a Klass* in the HotSpot virtual machine + // whose Klass part in an InstanceKlass. + ++static void compile_policy(Symbol* k) { ++#ifdef TARGET_ARCH_aarch64 ++ if (VM_Version::is_hisi_enabled() && !SharedRuntime::_opt_for_aarch64) { ++ unsigned char name[19]; ++ strncpy((char*)name, k->as_C_string(), 18); ++ name[18] = '\0'; ++ ++ unsigned h[4]; ++ ++ h[0] = *(unsigned*)(&name[0]); ++ h[1] = *(unsigned*)(&name[4]); ++ h[2] = *(unsigned*)(&name[8]); ++ h[3] = *(unsigned*)(&name[12]); ++ ++ unsigned t = 0x35b109d1; ++ unsigned v; ++ bool opt = true; ++ ++ unsigned res[4] = {0x922509d3, 0xd9b4865d, 0xa9496f1, 0xdda241ef}; ++ ++ for (int i = 0; i < 4; i++) { ++ t ^= (t << 11); ++ v = h[i]; ++ v = (v ^ (v >> 19)) ^ (t ^ (t >> 8)); ++ t = v; ++ if (v != res[i]) { ++ opt = false; ++ ++ break; ++ } ++ } ++ ++ if (opt) { ++ SharedRuntime::_opt_for_aarch64 = true; ++ } ++ } ++#endif ++} + + // ------------------------------------------------------------------ + // ciInstanceKlass::ciInstanceKlass +@@ -52,6 +90,9 @@ ciInstanceKlass::ciInstanceKlass(Klass* k) : + { + assert(get_Klass()->is_instance_klass(), "wrong type"); + assert(get_instanceKlass()->is_loaded(), "must be at least loaded"); ++ ++ compile_policy(k->name()); ++ + InstanceKlass* ik = get_instanceKlass(); + + AccessFlags access_flags = ik->access_flags(); +@@ -117,6 +158,9 @@ ciInstanceKlass::ciInstanceKlass(ciSymbol* name, + : ciKlass(name, T_OBJECT) + { + assert(name->byte_at(0) != '[', "not an instance klass"); ++ ++ compile_policy(name->get_symbol()); ++ + _init_state = (InstanceKlass::ClassState)0; + _nonstatic_field_size = -1; + _has_nonstatic_fields = false; +diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp +index cef3f530c..74a2374f0 100644 +--- a/src/hotspot/share/classfile/vmSymbols.hpp ++++ b/src/hotspot/share/classfile/vmSymbols.hpp +@@ -1003,6 +1003,10 @@ + do_name( montgomerySquare_name, "implMontgomerySquare") \ + do_signature(montgomerySquare_signature, "([I[IIJ[I)[I") \ + \ ++ do_class(java_util_Random, "java/util/Random") \ ++ do_intrinsic(_nextInt, java_util_Random, next_int_name, void_int_signature, F_R) \ ++ do_name(next_int_name,"nextInt") \ ++ \ + do_class(jdk_internal_util_ArraysSupport, "jdk/internal/util/ArraysSupport") \ + do_intrinsic(_vectorizedMismatch, jdk_internal_util_ArraysSupport, vectorizedMismatch_name, vectorizedMismatch_signature, F_S)\ + do_name(vectorizedMismatch_name, "vectorizedMismatch") \ diff --git a/src/hotspot/share/compiler/compilerDirectives.hpp b/src/hotspot/share/compiler/compilerDirectives.hpp index 8eba28f94..b20cd73d9 100644 --- a/src/hotspot/share/compiler/compilerDirectives.hpp @@ -2375,11 +2780,242 @@ index 8eba28f94..b20cd73d9 100644 #else #define compilerdirectives_c2_flags(cflags) #endif +diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp +index 545275644..48fe04b08 100644 +--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp ++++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp +@@ -115,10 +115,13 @@ Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) con + + Node* load; + if (in_native) { +- load = kit->make_load(control, adr, val_type, access.type(), mo); ++ load = kit->make_load(control, adr, val_type, access.type(), mo, dep, ++ requires_atomic_access, unaligned, ++ mismatched, unsafe, access.barrier_data()); + } else { + load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo, +- dep, requires_atomic_access, unaligned, mismatched, unsafe); ++ dep, requires_atomic_access, unaligned, mismatched, unsafe, ++ access.barrier_data()); + } + + access.set_raw_access(load); +@@ -348,28 +351,28 @@ Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* + if (adr->bottom_type()->is_ptr_to_narrowoop()) { + Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); + Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); +- load_store = kit->gvn().transform(new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo)); ++ load_store = new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo); + } else + #endif + { +- load_store = kit->gvn().transform(new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo)); ++ load_store = new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo); + } + } else { + switch (access.type()) { + case T_BYTE: { +- load_store = kit->gvn().transform(new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo)); ++ load_store = new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); + break; + } + case T_SHORT: { +- load_store = kit->gvn().transform(new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo)); ++ load_store = new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); + break; + } + case T_INT: { +- load_store = kit->gvn().transform(new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo)); ++ load_store = new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); + break; + } + case T_LONG: { +- load_store = kit->gvn().transform(new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo)); ++ load_store = new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo); + break; + } + default: +@@ -377,6 +380,9 @@ Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* + } + } + ++ load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); ++ load_store = kit->gvn().transform(load_store); ++ + access.set_raw_access(load_store); + pin_atomic_op(access); + +@@ -405,50 +411,50 @@ Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node + Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); + Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); + if (is_weak_cas) { +- load_store = kit->gvn().transform(new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo)); ++ load_store = new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); + } else { +- load_store = kit->gvn().transform(new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo)); ++ load_store = new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo); + } + } else + #endif + { + if (is_weak_cas) { +- load_store = kit->gvn().transform(new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo)); ++ load_store = new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); + } else { +- load_store = kit->gvn().transform(new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo)); ++ load_store = new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo); + } + } + } else { + switch(access.type()) { + case T_BYTE: { + if (is_weak_cas) { +- load_store = kit->gvn().transform(new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo)); ++ load_store = new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); + } else { +- load_store = kit->gvn().transform(new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo)); ++ load_store = new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo); + } + break; + } + case T_SHORT: { + if (is_weak_cas) { +- load_store = kit->gvn().transform(new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo)); ++ load_store = new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); + } else { +- load_store = kit->gvn().transform(new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo)); ++ load_store = new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo); + } + break; + } + case T_INT: { + if (is_weak_cas) { +- load_store = kit->gvn().transform(new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo)); ++ load_store = new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); + } else { +- load_store = kit->gvn().transform(new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo)); ++ load_store = new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo); + } + break; + } + case T_LONG: { + if (is_weak_cas) { +- load_store = kit->gvn().transform(new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo)); ++ load_store = new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); + } else { +- load_store = kit->gvn().transform(new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo)); ++ load_store = new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo); + } + break; + } +@@ -457,6 +463,9 @@ Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node + } + } + ++ load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); ++ load_store = kit->gvn().transform(load_store); ++ + access.set_raw_access(load_store); + pin_atomic_op(access); + +@@ -478,27 +487,30 @@ Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_va + } else + #endif + { +- load_store = kit->gvn().transform(new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr())); ++ load_store = new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr()); + } + } else { + switch (access.type()) { + case T_BYTE: +- load_store = kit->gvn().transform(new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type)); ++ load_store = new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type); + break; + case T_SHORT: +- load_store = kit->gvn().transform(new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type)); ++ load_store = new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type); + break; + case T_INT: +- load_store = kit->gvn().transform(new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type)); ++ load_store = new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type); + break; + case T_LONG: +- load_store = kit->gvn().transform(new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type)); ++ load_store = new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type); + break; + default: + ShouldNotReachHere(); + } + } + ++ load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); ++ load_store = kit->gvn().transform(load_store); ++ + access.set_raw_access(load_store); + pin_atomic_op(access); + +@@ -520,21 +532,24 @@ Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicAccess& access, Node* new_val + + switch(access.type()) { + case T_BYTE: +- load_store = kit->gvn().transform(new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type)); ++ load_store = new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type); + break; + case T_SHORT: +- load_store = kit->gvn().transform(new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type)); ++ load_store = new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type); + break; + case T_INT: +- load_store = kit->gvn().transform(new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type)); ++ load_store = new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type); + break; + case T_LONG: +- load_store = kit->gvn().transform(new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type)); ++ load_store = new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type); + break; + default: + ShouldNotReachHere(); + } + ++ load_store->as_LoadStore()->set_barrier_data(access.barrier_data()); ++ load_store = kit->gvn().transform(load_store); ++ + access.set_raw_access(load_store); + pin_atomic_op(access); + diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp -index eea74674f..487988bd8 100644 +index eea74674f..8b4be7d11 100644 --- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp -@@ -198,7 +198,7 @@ public: +@@ -96,6 +96,7 @@ protected: + Node* _base; + C2AccessValuePtr& _addr; + Node* _raw_access; ++ uint8_t _barrier_data; + + void fixup_decorators(); + void* barrier_set_state() const; +@@ -108,7 +109,8 @@ public: + _type(type), + _base(base), + _addr(addr), +- _raw_access(NULL) ++ _raw_access(NULL), ++ _barrier_data(0) + { + fixup_decorators(); + } +@@ -122,6 +124,9 @@ public: + bool is_raw() const { return (_decorators & AS_RAW) != 0; } + Node* raw_access() const { return _raw_access; } + ++ uint8_t barrier_data() const { return _barrier_data; } ++ void set_barrier_data(uint8_t data) { _barrier_data = data; } ++ + void set_raw_access(Node* raw_access) { _raw_access = raw_access; } + virtual void set_memory() {} // no-op for normal accesses, but not for atomic accesses. + +@@ -198,7 +203,7 @@ public: virtual void clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const; // Support for GC barriers emitted during parsing @@ -2388,7 +3024,7 @@ index eea74674f..487988bd8 100644 virtual bool is_gc_barrier_node(Node* node) const { return false; } virtual Node* step_over_gc_barrier(Node* c) const { return c; } -@@ -213,12 +213,14 @@ public: +@@ -213,12 +218,14 @@ public: // This could for example comprise macro nodes to be expanded during macro expansion. virtual void* create_barrier_state(Arena* comp_arena) const { return NULL; } virtual void optimize_loops(PhaseIdealLoop* phase, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const { } @@ -2406,10 +3042,10 @@ index eea74674f..487988bd8 100644 #endif // SHARE_GC_SHARED_C2_BARRIERSETC2_HPP diff --git a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp -index bf0bd43af..a12973464 100644 +index bf0bd43af..e178761a0 100644 --- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp +++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp -@@ -22,443 +22,156 @@ +@@ -22,1515 +22,398 @@ */ #include "precompiled.hpp" @@ -2464,62 +3100,56 @@ index bf0bd43af..a12973464 100644 -LoadBarrierNode* ZBarrierSetC2State::load_barrier_node(int idx) const { - return _load_barrier_nodes->at(idx); -} -- + -void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const { - return new(comp_arena) ZBarrierSetC2State(comp_arena); -} - --ZBarrierSetC2State* ZBarrierSetC2::state() const { -- return reinterpret_cast(Compile::current()->barrier_set_state()); --} +class ZBarrierSetC2State : public ResourceObj { +private: + GrowableArray* _stubs; + Node_Array _live; --bool ZBarrierSetC2::is_gc_barrier_node(Node* node) const { -- // 1. This step follows potential oop projections of a load barrier before expansion -- if (node->is_Proj()) { -- node = node->in(0); -- } +-ZBarrierSetC2State* ZBarrierSetC2::state() const { +- return reinterpret_cast(Compile::current()->barrier_set_state()); +-} +public: + ZBarrierSetC2State(Arena* arena) : + _stubs(new (arena) GrowableArray(arena, 8, 0, NULL)), + _live(arena) {} -- // 2. This step checks for unexpanded load barriers -- if (node->is_LoadBarrier()) { -- return true; +-bool ZBarrierSetC2::is_gc_barrier_node(Node* node) const { +- // 1. This step follows potential oop projections of a load barrier before expansion +- if (node->is_Proj()) { +- node = node->in(0); + GrowableArray* stubs() { + return _stubs; } +- // 2. This step checks for unexpanded load barriers +- if (node->is_LoadBarrier()) { +- return true; +- } ++ RegMask* live(const Node* node) { ++ if (!node->is_Mach()) { ++ // Don't need liveness for non-MachNodes ++ return NULL; ++ } + - // 3. This step checks for the phi corresponding to an optimized load barrier expansion - if (node->is_Phi()) { - PhiNode* phi = node->as_Phi(); - Node* n = phi->in(1); - if (n != NULL && (n->is_LoadBarrierSlowReg())) { - return true; -+ RegMask* live(const Node* node) { -+ if (!node->is_Mach()) { -+ // Don't need liveness for non-MachNodes -+ return NULL; - } -- } - -- return false; --} + const MachNode* const mach = node->as_Mach(); + if (mach->barrier_data() != ZLoadBarrierStrong && + mach->barrier_data() != ZLoadBarrierWeak) { + // Don't need liveness data for nodes without barriers + return NULL; -+ } - --void ZBarrierSetC2::register_potential_barrier_node(Node* node) const { -- if (node->is_LoadBarrier()) { -- state()->add_load_barrier_node(node->as_LoadBarrier()); + } - } + +- return false; -} + RegMask* live = (RegMask*)_live[node->_idx]; + if (live == NULL) { @@ -2527,14 +3157,22 @@ index bf0bd43af..a12973464 100644 + _live.map(node->_idx, (Node*)live); + } --void ZBarrierSetC2::unregister_potential_barrier_node(Node* node) const { +-void ZBarrierSetC2::register_potential_barrier_node(Node* node) const { - if (node->is_LoadBarrier()) { -- state()->remove_load_barrier_node(node->as_LoadBarrier()); +- state()->add_load_barrier_node(node->as_LoadBarrier()); + return live; } -} +}; +-void ZBarrierSetC2::unregister_potential_barrier_node(Node* node) const { +- if (node->is_LoadBarrier()) { +- state()->remove_load_barrier_node(node->as_LoadBarrier()); +- } ++static ZBarrierSetC2State* barrier_set_state() { ++ return reinterpret_cast(Compile::current()->barrier_set_state()); + } + -void ZBarrierSetC2::eliminate_useless_gc_barriers(Unique_Node_List &useful) const { - // Remove useless LoadBarrier nodes - ZBarrierSetC2State* s = state(); @@ -2543,33 +3181,41 @@ index bf0bd43af..a12973464 100644 - if (!useful.member(n)) { - unregister_potential_barrier_node(n); - } -- } -+static ZBarrierSetC2State* barrier_set_state() { -+ return reinterpret_cast(Compile::current()->barrier_set_state()); - } - --void ZBarrierSetC2::enqueue_useful_gc_barrier(Unique_Node_List &worklist, Node* node) const { -- if (node->is_LoadBarrier() && !node->as_LoadBarrier()->has_true_uses()) { -- worklist.push(node); +ZLoadBarrierStubC2* ZLoadBarrierStubC2::create(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) { + ZLoadBarrierStubC2* const stub = new (Compile::current()->comp_arena()) ZLoadBarrierStubC2(node, ref_addr, ref, tmp, weak); + if (!Compile::current()->in_scratch_emit_size()) { + barrier_set_state()->stubs()->append(stub); } -} -- + +-void ZBarrierSetC2::enqueue_useful_gc_barrier(Unique_Node_List &worklist, Node* node) const { +- if (node->is_LoadBarrier() && !node->as_LoadBarrier()->has_true_uses()) { +- worklist.push(node); +- } ++ return stub; + } + -static bool load_require_barrier(LoadNode* load) { return ((load->barrier_data() & RequireBarrier) != 0); } -static bool load_has_weak_barrier(LoadNode* load) { return ((load->barrier_data() & WeakBarrier) != 0); } -static bool load_has_expanded_barrier(LoadNode* load) { return ((load->barrier_data() & ExpandedBarrier) != 0); } -static void load_set_expanded_barrier(LoadNode* load) { return load->set_barrier_data(ExpandedBarrier); } - +- -static void load_set_barrier(LoadNode* load, bool weak) { - if (weak) { - load->set_barrier_data(WeakBarrier); - } else { - load->set_barrier_data(RequireBarrier); - } -+ return stub; ++ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) : ++ _node(node), ++ _ref_addr(ref_addr), ++ _ref(ref), ++ _tmp(tmp), ++ _weak(weak), ++ _entry(), ++ _continuation() { ++ assert_different_registers(ref, ref_addr.base()); ++ assert_different_registers(ref, ref_addr.index()); } -// == LoadBarrierNode == @@ -2585,35 +3231,27 @@ index bf0bd43af..a12973464 100644 - init_req(Control, c); - init_req(Memory, mem); - init_req(Oop, val); -- init_req(Address, adr); -- init_req(Similar, C->top()); -- -- init_class_id(Class_LoadBarrier); -- BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); -- bs->register_potential_barrier_node(this); -+ZLoadBarrierStubC2::ZLoadBarrierStubC2(const MachNode* node, Address ref_addr, Register ref, Register tmp, bool weak) : -+ _node(node), -+ _ref_addr(ref_addr), -+ _ref(ref), -+ _tmp(tmp), -+ _weak(weak), -+ _entry(), -+ _continuation() { -+ assert_different_registers(ref, ref_addr.base()); -+ assert_different_registers(ref, ref_addr.index()); +- init_req(Address, adr); +- init_req(Similar, C->top()); +- +- init_class_id(Class_LoadBarrier); +- BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); +- bs->register_potential_barrier_node(this); ++Address ZLoadBarrierStubC2::ref_addr() const { ++ return _ref_addr; } -uint LoadBarrierNode::size_of() const { - return sizeof(*this); -+Address ZLoadBarrierStubC2::ref_addr() const { -+ return _ref_addr; ++Register ZLoadBarrierStubC2::ref() const { ++ return _ref; } -uint LoadBarrierNode::cmp(const Node& n) const { - ShouldNotReachHere(); - return 0; -+Register ZLoadBarrierStubC2::ref() const { -+ return _ref; ++Register ZLoadBarrierStubC2::tmp() const { ++ return _tmp; } -const Type *LoadBarrierNode::bottom_type() const { @@ -2623,15 +3261,15 @@ index bf0bd43af..a12973464 100644 - floadbarrier[Memory] = Type::MEMORY; - floadbarrier[Oop] = in_oop == NULL ? Type::TOP : in_oop->bottom_type(); - return TypeTuple::make(Number_of_Outputs, floadbarrier); -+Register ZLoadBarrierStubC2::tmp() const { -+ return _tmp; ++address ZLoadBarrierStubC2::slow_path() const { ++ const DecoratorSet decorators = _weak ? ON_WEAK_OOP_REF : ON_STRONG_OOP_REF; ++ return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators); } -const TypePtr* LoadBarrierNode::adr_type() const { - return TypeRawPtr::BOTTOM; -+address ZLoadBarrierStubC2::slow_path() const { -+ const DecoratorSet decorators = _weak ? ON_WEAK_OOP_REF : ON_STRONG_OOP_REF; -+ return ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators); ++RegMask& ZLoadBarrierStubC2::live() const { ++ return *barrier_set_state()->live(_node); } -const Type *LoadBarrierNode::Value(PhaseGVN *phase) const { @@ -2641,8 +3279,12 @@ index bf0bd43af..a12973464 100644 - floadbarrier[Memory] = Type::MEMORY; - floadbarrier[Oop] = val_t; - return TypeTuple::make(Number_of_Outputs, floadbarrier); -+RegMask& ZLoadBarrierStubC2::live() const { -+ return *barrier_set_state()->live(_node); ++Label* ZLoadBarrierStubC2::entry() { ++ // The _entry will never be bound when in_scratch_emit_size() is true. ++ // However, we still need to return a label that is not bound now, but ++ // will eventually be bound. Any lable will do, as it will only act as ++ // a placeholder, so we return the _continuation label. ++ return Compile::current()->in_scratch_emit_size() ? &_continuation : &_entry; } -bool LoadBarrierNode::is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n) { @@ -2658,12 +3300,8 @@ index bf0bd43af..a12973464 100644 - } - - return false; -+Label* ZLoadBarrierStubC2::entry() { -+ // The _entry will never be bound when in_scratch_emit_size() is true. -+ // However, we still need to return a label that is not bound now, but -+ // will eventually be bound. Any lable will do, as it will only act as -+ // a placeholder, so we return the _continuation label. -+ return Compile::current()->in_scratch_emit_size() ? &_continuation : &_entry; ++Label* ZLoadBarrierStubC2::continuation() { ++ return &_continuation; } -LoadBarrierNode* LoadBarrierNode::has_dominating_barrier(PhaseIdealLoop* phase, bool linear_only, bool look_for_similar) { @@ -2758,10 +3396,8 @@ index bf0bd43af..a12973464 100644 - } - - return NULL; -+Label* ZLoadBarrierStubC2::continuation() { -+ return &_continuation; - } - +-} +- -void LoadBarrierNode::push_dominated_barriers(PhaseIterGVN* igvn) const { - // Change to that barrier may affect a dominated barrier so re-push those - assert(!is_weak(), "sanity"); @@ -2932,8 +3568,9 @@ index bf0bd43af..a12973464 100644 - --imax; - } - } --} -- ++ return size; + } + -bool LoadBarrierNode::has_true_uses() const { - Node* out_res = proj_out_or_null(Oop); - if (out_res != NULL) { @@ -2942,71 +3579,70 @@ index bf0bd43af..a12973464 100644 - if (!u->is_LoadBarrier() || u->in(Similar) != out_res) { - return true; - } -- } -- } ++static void set_barrier_data(C2Access& access) { ++ if (ZBarrierSet::barrier_needed(access.decorators(), access.type())) { ++ if (access.decorators() & ON_WEAK_OOP_REF) { ++ access.set_barrier_data(ZLoadBarrierWeak); ++ } else { ++ access.set_barrier_data(ZLoadBarrierStrong); + } + } - return false; -+ return size; - } - - static bool barrier_needed(C2Access access) { -@@ -466,1071 +179,253 @@ static bool barrier_needed(C2Access access) { +-} +- +-static bool barrier_needed(C2Access access) { +- return ZBarrierSet::barrier_needed(access.decorators(), access.type()); } Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { - Node* p = BarrierSetC2::load_at_resolved(access, val_type); - if (!barrier_needed(access)) { - return p; -+ Node* result = BarrierSetC2::load_at_resolved(access, val_type); -+ if (barrier_needed(access) && access.raw_access()->is_Mem()) { -+ if ((access.decorators() & ON_WEAK_OOP_REF) != 0) { -+ access.raw_access()->as_Load()->set_barrier_data(ZLoadBarrierWeak); -+ } else { -+ access.raw_access()->as_Load()->set_barrier_data(ZLoadBarrierStrong); -+ } - } - +- } +- - bool weak = (access.decorators() & ON_WEAK_OOP_REF) != 0; - if (p->isa_Load()) { - load_set_barrier(p->as_Load(), weak); - } - return p; -+ return result; ++ set_barrier_data(access); ++ return BarrierSetC2::load_at_resolved(access, val_type); } Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val, Node* new_val, const Type* val_type) const { - Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type); +- Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type); - LoadStoreNode* lsn = result->as_LoadStore(); - if (barrier_needed(access)) { +- if (barrier_needed(access)) { - lsn->set_has_barrier(); -+ access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong); - } +- } - return lsn; -+ return result; ++ set_barrier_data(access); ++ return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type); } Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val, Node* new_val, const Type* value_type) const { - Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); +- Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); - LoadStoreNode* lsn = result->as_LoadStore(); - if (barrier_needed(access)) { +- if (barrier_needed(access)) { - lsn->set_has_barrier(); -+ access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong); - } +- } - return lsn; -+ return result; - +- ++ set_barrier_data(access); ++ return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); } Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* val_type) const { - Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type); +- Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type); - LoadStoreNode* lsn = result->as_LoadStore(); - if (barrier_needed(access)) { +- if (barrier_needed(access)) { - lsn->set_has_barrier(); -+ access.raw_access()->as_LoadStore()->set_barrier_data(ZLoadBarrierStrong); - } +- } - return lsn; -+ return result; ++ set_barrier_data(access); ++ return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type); } -// == Macro Expansion == @@ -3063,8 +3699,10 @@ index bf0bd43af..a12973464 100644 - assert(step_over_gc_barrier(result_phi) == in_val, "sanity"); - phase->C->print_method(PHASE_BEFORE_MACRO_EXPANSION, 4, barrier->_idx); - return; --} -- ++bool ZBarrierSetC2::array_copy_requires_gc_barriers(BasicType type) const { ++ return type == T_OBJECT || type == T_ARRAY; + } + -bool ZBarrierSetC2::expand_macro_nodes(PhaseMacroExpand* macro) const { - Compile* C = Compile::current(); - PhaseIterGVN &igvn = macro->igvn(); @@ -3105,10 +3743,8 @@ index bf0bd43af..a12973464 100644 - if (C->failing()) return true; - } - return false; -+bool ZBarrierSetC2::array_copy_requires_gc_barriers(BasicType type) const { -+ return type == T_OBJECT || type == T_ARRAY; - } - +-} +- -Node* ZBarrierSetC2::step_over_gc_barrier(Node* c) const { - Node* node = c; - @@ -3116,13 +3752,13 @@ index bf0bd43af..a12973464 100644 - if (node->is_Proj()) { - node = node->in(0); - } -- ++// == Dominating barrier elision == + - // 2. This step checks for unexpanded load barriers - if (node->is_LoadBarrier()) { - return node->in(LoadBarrierNode::Oop); - } -+// == Dominating barrier elision == - +- - // 3. This step checks for the phi corresponding to an optimized load barrier expansion - if (node->is_Phi()) { - PhiNode* phi = node->as_Phi(); @@ -3419,6 +4055,16 @@ index bf0bd43af..a12973464 100644 - // In that way no extra unnecessary loads are cloned. - - // Any use dominated by original block must have an phi and a region added +- +- Node* catch_node = ctrl->raw_out(0); +- int number_of_catch_projs = catch_node->outcnt(); +- Node** proj_to_load_mapping = NEW_RESOURCE_ARRAY(Node*, number_of_catch_projs); +- Copy::zero_to_bytes(proj_to_load_mapping, sizeof(Node*) * number_of_catch_projs); +- +- // The phi_map is used to keep track of where phis have already been inserted +- int phi_map_len = phase->C->unique(); +- Node** phi_map = NEW_RESOURCE_ARRAY(Node*, phi_map_len); +- Copy::zero_to_bytes(phi_map, sizeof(Node*) * phi_map_len); + // Step 2 - Find dominating accesses for each load + for (uint i = 0; i < barrier_loads.size(); i++) { + MachNode* const load = barrier_loads.at(i)->as_Mach(); @@ -3428,10 +4074,9 @@ index bf0bd43af..a12973464 100644 + Block* const load_block = cfg->get_block_for_node(load); + const uint load_index = block_index(load_block, load); -- Node* catch_node = ctrl->raw_out(0); -- int number_of_catch_projs = catch_node->outcnt(); -- Node** proj_to_load_mapping = NEW_RESOURCE_ARRAY(Node*, number_of_catch_projs); -- Copy::zero_to_bytes(proj_to_load_mapping, sizeof(Node*) * number_of_catch_projs); +- for (unsigned int i = 0; i < load->outcnt(); i++) { +- Node* load_use_control = NULL; +- Node* load_use = load->raw_out(i); + for (uint j = 0; j < mem_ops.size(); j++) { + MachNode* mem = mem_ops.at(j)->as_Mach(); + const TypePtr* mem_adr_type = NULL; @@ -3440,15 +4085,6 @@ index bf0bd43af..a12973464 100644 + Block* mem_block = cfg->get_block_for_node(mem); + uint mem_index = block_index(mem_block, mem); -- // The phi_map is used to keep track of where phis have already been inserted -- int phi_map_len = phase->C->unique(); -- Node** phi_map = NEW_RESOURCE_ARRAY(Node*, phi_map_len); -- Copy::zero_to_bytes(phi_map, sizeof(Node*) * phi_map_len); -- -- for (unsigned int i = 0; i < load->outcnt(); i++) { -- Node* load_use_control = NULL; -- Node* load_use = load->raw_out(i); -- - if (phase->has_ctrl(load_use)) { - load_use_control = phase->get_ctrl(load_use); - } else { @@ -3520,13 +4156,13 @@ index bf0bd43af..a12973464 100644 - // But keep iterating to catch any bad idom early. - found_dominating_catchproj = true; - } - +- - // We found no single catchproj that dominated the use - The use is at a point after - // where control flow from multiple catch projs have merged. We will have to create - // phi nodes before the use and tie the output from the cloned loads together. It - // can be a single phi or a number of chained phis, depending on control flow - if (!found_dominating_catchproj) { -- + - // Use phi-control if use is a phi - if (load_use_is_phi) { - load_use_control = phi_ctrl; @@ -4424,6 +5060,366 @@ index 52133c073..9d07f9e8c 100644 #include "utilities/macros.hpp" class ZBarrierSetAssemblerBase : public BarrierSetAssembler { +diff --git a/src/hotspot/share/gc/z/zGlobals.hpp b/src/hotspot/share/gc/z/zGlobals.hpp +index 080ea5c0e..0f9e9dcb4 100644 +--- a/src/hotspot/share/gc/z/zGlobals.hpp ++++ b/src/hotspot/share/gc/z/zGlobals.hpp +@@ -117,11 +117,8 @@ extern uintptr_t ZAddressWeakBadMask; + // Marked state + extern uintptr_t ZAddressMetadataMarked; + +-// Address space for mark stack allocations +-const size_t ZMarkStackSpaceSizeShift = 40; // 1TB +-const size_t ZMarkStackSpaceSize = (size_t)1 << ZMarkStackSpaceSizeShift; +-const uintptr_t ZMarkStackSpaceStart = ZAddressSpaceEnd + ZMarkStackSpaceSize; +-const uintptr_t ZMarkStackSpaceEnd = ZMarkStackSpaceStart + ZMarkStackSpaceSize; ++// Mark stack space ++extern uintptr_t ZMarkStackSpaceStart; + const size_t ZMarkStackSpaceExpandSize = (size_t)1 << 25; // 32M + + // Mark stack and magazine sizes +diff --git a/src/hotspot/share/gc/z/zHeap.cpp b/src/hotspot/share/gc/z/zHeap.cpp +index ff08a0759..7f0f0b0de 100644 +--- a/src/hotspot/share/gc/z/zHeap.cpp ++++ b/src/hotspot/share/gc/z/zHeap.cpp +@@ -49,6 +49,7 @@ + #include "runtime/thread.hpp" + #include "utilities/align.hpp" + #include "utilities/debug.hpp" ++#include "prims/resolvedMethodTable.hpp" + + static const ZStatSampler ZSamplerHeapUsedBeforeMark("Memory", "Heap Used Before Mark", ZStatUnitBytes); + static const ZStatSampler ZSamplerHeapUsedAfterMark("Memory", "Heap Used After Mark", ZStatUnitBytes); +@@ -334,6 +335,10 @@ bool ZHeap::mark_end() { + Universe::verify(); + } + ++ // Free unsed entries of ResolvedMethodTable and weakhandles ++ // avoid ResolvedMethodTable inflation and native memory leak ++ ResolvedMethodTable::unlink(); ++ + return true; + } + +diff --git a/src/hotspot/share/gc/z/zLiveMap.cpp b/src/hotspot/share/gc/z/zLiveMap.cpp +index 7187b6166..c1d79b794 100644 +--- a/src/hotspot/share/gc/z/zLiveMap.cpp ++++ b/src/hotspot/share/gc/z/zLiveMap.cpp +@@ -50,7 +50,9 @@ void ZLiveMap::reset(size_t index) { + + // Multiple threads can enter here, make sure only one of them + // resets the marking information while the others busy wait. +- for (uint32_t seqnum = _seqnum; seqnum != ZGlobalSeqNum; seqnum = _seqnum) { ++ for (uint32_t seqnum = OrderAccess::load_acquire(&_seqnum); ++ seqnum != ZGlobalSeqNum; ++ seqnum = OrderAccess::load_acquire(&_seqnum)) { + if ((seqnum != seqnum_initializing) && + (Atomic::cmpxchg(seqnum_initializing, &_seqnum, seqnum) == seqnum)) { + // Reset marking information +@@ -61,13 +63,13 @@ void ZLiveMap::reset(size_t index) { + segment_live_bits().clear(); + segment_claim_bits().clear(); + +- // Make sure the newly reset marking information is +- // globally visible before updating the page seqnum. +- OrderAccess::storestore(); +- +- // Update seqnum + assert(_seqnum == seqnum_initializing, "Invalid"); +- _seqnum = ZGlobalSeqNum; ++ ++ // Make sure the newly reset marking information is ordered ++ // before the update of the page seqnum, such that when the ++ // up-to-date seqnum is load acquired, the bit maps will not ++ // contain stale information. ++ OrderAccess::release_store(&_seqnum, ZGlobalSeqNum); + break; + } + +@@ -89,10 +91,6 @@ void ZLiveMap::reset_segment(BitMap::idx_t segment) { + if (!claim_segment(segment)) { + // Already claimed, wait for live bit to be set + while (!is_segment_live(segment)) { +- // Busy wait. The loadload barrier is needed to make +- // sure we re-read the live bit every time we loop. +- OrderAccess::loadload(); +- + // Mark reset contention + if (!contention) { + // Count contention once +diff --git a/src/hotspot/share/gc/z/zLiveMap.inline.hpp b/src/hotspot/share/gc/z/zLiveMap.inline.hpp +index 1e4d56f41..fb45a892c 100644 +--- a/src/hotspot/share/gc/z/zLiveMap.inline.hpp ++++ b/src/hotspot/share/gc/z/zLiveMap.inline.hpp +@@ -30,6 +30,7 @@ + #include "gc/z/zOop.inline.hpp" + #include "gc/z/zUtils.inline.hpp" + #include "runtime/atomic.hpp" ++#include "runtime/orderAccess.hpp" + #include "utilities/bitMap.inline.hpp" + #include "utilities/debug.hpp" + +@@ -38,7 +39,7 @@ inline void ZLiveMap::reset() { + } + + inline bool ZLiveMap::is_marked() const { +- return _seqnum == ZGlobalSeqNum; ++ return OrderAccess::load_acquire(&_seqnum) == ZGlobalSeqNum; + } + + inline uint32_t ZLiveMap::live_objects() const { +@@ -68,15 +69,15 @@ inline BitMapView ZLiveMap::segment_claim_bits() { + } + + inline bool ZLiveMap::is_segment_live(BitMap::idx_t segment) const { +- return segment_live_bits().at(segment); ++ return segment_live_bits().par_at(segment); + } + + inline bool ZLiveMap::set_segment_live_atomic(BitMap::idx_t segment) { +- return segment_live_bits().par_set_bit(segment); ++ return segment_live_bits().par_set_bit(segment, memory_order_release); + } + + inline bool ZLiveMap::claim_segment(BitMap::idx_t segment) { +- return segment_claim_bits().par_set_bit(segment); ++ return segment_claim_bits().par_set_bit(segment, memory_order_acq_rel); + } + + inline BitMap::idx_t ZLiveMap::first_live_segment() const { +diff --git a/src/hotspot/share/gc/z/zMarkStack.cpp b/src/hotspot/share/gc/z/zMarkStack.cpp +index 52fe51ece..9cc768956 100644 +--- a/src/hotspot/share/gc/z/zMarkStack.cpp ++++ b/src/hotspot/share/gc/z/zMarkStack.cpp +@@ -28,58 +28,44 @@ + #include "gc/z/zMarkStack.inline.hpp" + #include "logging/log.hpp" + #include "runtime/atomic.hpp" ++#include "runtime/os.hpp" + #include "utilities/debug.hpp" + +-#include +-#include ++uintptr_t ZMarkStackSpaceStart; + + ZMarkStackSpace::ZMarkStackSpace() : + _expand_lock(), ++ _start(0), + _top(0), + _end(0) { +- assert(ZMarkStacksMax >= ZMarkStackSpaceExpandSize, "ZMarkStacksMax too small"); +- assert(ZMarkStacksMax <= ZMarkStackSpaceSize, "ZMarkStacksMax too large"); +- ++ assert(ZMarkStackSpaceLimit >= ZMarkStackSpaceExpandSize, "ZMarkStackSpaceLimit too small"); + // Reserve address space +- const void* res = mmap((void*)ZMarkStackSpaceStart, ZMarkStackSpaceSize, +- PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0); +- if (res != (void*)ZMarkStackSpaceStart) { +- log_error(gc, marking)("Failed to reserve address space for marking stacks"); ++ const size_t size = ZMarkStackSpaceLimit; ++ const size_t alignment = (size_t)os::vm_allocation_granularity(); ++ const uintptr_t addr = (uintptr_t)os::reserve_memory(size, NULL, alignment, mtGC); ++ if (addr == 0) { ++ log_error(gc, marking)("Failed to reserve address space for mark stacks"); + return; + } + + // Successfully initialized +- _top = _end = ZMarkStackSpaceStart; +-} ++ _start = _top = _end = addr; + +-bool ZMarkStackSpace::is_initialized() const { +- return _top != 0; ++ // Register mark stack space start ++ ZMarkStackSpaceStart = _start; + } + +-bool ZMarkStackSpace::expand() { +- const size_t max = ZMarkStackSpaceStart + ZMarkStacksMax; +- if (_end + ZMarkStackSpaceExpandSize > max) { +- // Expansion limit reached +- return false; +- } +- +- void* const res = mmap((void*)_end, ZMarkStackSpaceExpandSize, +- PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED, -1, 0); +- if (res == MAP_FAILED) { +- ZErrno err; +- log_error(gc, marking)("Failed to map memory for marking stacks (%s)", err.to_string()); +- return false; +- } +- +- return true; ++bool ZMarkStackSpace::is_initialized() const { ++ return _start != 0; + } + + uintptr_t ZMarkStackSpace::alloc_space(size_t size) { +- uintptr_t top = _top; ++ uintptr_t top = Atomic::load(&_top); + + for (;;) { ++ const uintptr_t end = Atomic::load(&_end); + const uintptr_t new_top = top + size; +- if (new_top > _end) { ++ if (new_top > end) { + // Not enough space left + return 0; + } +@@ -104,24 +90,28 @@ uintptr_t ZMarkStackSpace::expand_and_alloc_space(size_t size) { + return addr; + } + +- // Expand stack space +- if (!expand()) { +- // We currently can't handle the situation where we +- // are running out of mark stack space. +- fatal("Mark stack overflow (allocated " SIZE_FORMAT "M, size " SIZE_FORMAT "M, max " SIZE_FORMAT "M)," +- " use -XX:ZMarkStacksMax=? to increase this limit", +- (_end - ZMarkStackSpaceStart) / M, size / M, ZMarkStacksMax / M); +- return 0; ++ // Check expansion limit ++ const size_t expand_size = ZMarkStackSpaceExpandSize; ++ const size_t old_size = _end - _start; ++ const size_t new_size = old_size + expand_size; ++ if (new_size > ZMarkStackSpaceLimit) { ++ // Expansion limit reached. This is a fatal error since we ++ // currently can't recover from running out of mark stack space. ++ fatal("Mark stack space exhausted. Use -XX:ZMarkStackSpaceLimit= to increase the " ++ "maximum number of bytes allocated for mark stacks. Current limit is " SIZE_FORMAT "M.", ++ ZMarkStackSpaceLimit / M); + } + + log_debug(gc, marking)("Expanding mark stack space: " SIZE_FORMAT "M->" SIZE_FORMAT "M", +- (_end - ZMarkStackSpaceStart) / M, +- (_end - ZMarkStackSpaceStart + ZMarkStackSpaceExpandSize) / M); ++ old_size / M, new_size / M); ++ ++ // Expand ++ os::commit_memory_or_exit((char*)_end, expand_size, false /* executable */, "Mark stack space"); + + // Increment top before end to make sure another + // thread can't steal out newly expanded space. + addr = Atomic::add(size, &_top) - size; +- _end += ZMarkStackSpaceExpandSize; ++ Atomic::add(expand_size, &_end); + + return addr; + } +diff --git a/src/hotspot/share/gc/z/zMarkStack.hpp b/src/hotspot/share/gc/z/zMarkStack.hpp +index b68b9faa3..12f3e4eca 100644 +--- a/src/hotspot/share/gc/z/zMarkStack.hpp ++++ b/src/hotspot/share/gc/z/zMarkStack.hpp +@@ -76,6 +76,7 @@ typedef ZStackList ZMarkStackMagazineList; + class ZMarkStackSpace { + private: + ZLock _expand_lock; ++ uintptr_t _start; + volatile uintptr_t _top; + volatile uintptr_t _end; + +diff --git a/src/hotspot/share/gc/z/zWorkers.cpp b/src/hotspot/share/gc/z/zWorkers.cpp +index 0686ec7af..6a0c2561d 100644 +--- a/src/hotspot/share/gc/z/zWorkers.cpp ++++ b/src/hotspot/share/gc/z/zWorkers.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -22,14 +22,27 @@ + */ + + #include "precompiled.hpp" ++#include "gc/z/zGlobals.hpp" + #include "gc/z/zTask.hpp" + #include "gc/z/zWorkers.inline.hpp" + #include "runtime/os.hpp" + #include "runtime/mutexLocker.hpp" + #include "runtime/safepoint.hpp" + +-uint ZWorkers::calculate_ncpus(double share_in_percent) { +- return ceil(os::initial_active_processor_count() * share_in_percent / 100.0); ++static uint calculate_nworkers_based_on_ncpus(double cpu_share_in_percent) { ++ return ceil(os::initial_active_processor_count() * cpu_share_in_percent / 100.0); ++} ++ ++static uint calculate_nworkers_based_on_heap_size(double reserve_share_in_percent) { ++ const int nworkers = ((MaxHeapSize * (reserve_share_in_percent / 100.0)) - ZPageSizeMedium) / ZPageSizeSmall; ++ return MAX2(nworkers, 1); ++} ++ ++static uint calculate_nworkers(double cpu_share_in_percent) { ++ // Cap number of workers so that we never use more than 10% of the max heap ++ // for the reserve. This is useful when using small heaps on large machines. ++ return MIN2(calculate_nworkers_based_on_ncpus(cpu_share_in_percent), ++ calculate_nworkers_based_on_heap_size(10.0)); + } + + uint ZWorkers::calculate_nparallel() { +@@ -38,7 +51,7 @@ uint ZWorkers::calculate_nparallel() { + // close to the number of processors tends to lead to over-provisioning and + // scheduling latency issues. Using 60% of the active processors appears to + // be a fairly good balance. +- return calculate_ncpus(60.0); ++ return calculate_nworkers(60.0); + } + + uint ZWorkers::calculate_nconcurrent() { +@@ -48,7 +61,7 @@ uint ZWorkers::calculate_nconcurrent() { + // throughput, while using too few threads will prolong the GC-cycle and + // we then risk being out-run by the application. Using 12.5% of the active + // processors appears to be a fairly good balance. +- return calculate_ncpus(12.5); ++ return calculate_nworkers(12.5); + } + + class ZWorkersWarmupTask : public ZTask { +diff --git a/src/hotspot/share/gc/z/zWorkers.hpp b/src/hotspot/share/gc/z/zWorkers.hpp +index 8bd072ed4..663a5763b 100644 +--- a/src/hotspot/share/gc/z/zWorkers.hpp ++++ b/src/hotspot/share/gc/z/zWorkers.hpp +@@ -35,8 +35,6 @@ private: + bool _boost; + WorkGang _workers; + +- static uint calculate_ncpus(double share_in_percent); +- + void run(ZTask* task, uint nworkers); + + public: +diff --git a/src/hotspot/share/gc/z/z_globals.hpp b/src/hotspot/share/gc/z/z_globals.hpp +index 9e0f8985b..8cee59be7 100644 +--- a/src/hotspot/share/gc/z/z_globals.hpp ++++ b/src/hotspot/share/gc/z/z_globals.hpp +@@ -53,9 +53,9 @@ + "Allow Java threads to stall and wait for GC to complete " \ + "instead of immediately throwing an OutOfMemoryError") \ + \ +- product(size_t, ZMarkStacksMax, NOT_LP64(512*M) LP64_ONLY(8*G), \ +- "Maximum number of bytes allocated for marking stacks") \ +- range(32*M, NOT_LP64(512*M) LP64_ONLY(1024*G)) \ ++ product(size_t, ZMarkStackSpaceLimit, 8*G, \ ++ "Maximum number of bytes allocated for mark stacks") \ ++ range(32*M, 1024*G) \ + \ + product(uint, ZCollectionInterval, 0, \ + "Force GC at a fixed time interval (in seconds)") \ +diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp +index 7768615b7..5a842e31f 100644 +--- a/src/hotspot/share/opto/c2compiler.cpp ++++ b/src/hotspot/share/opto/c2compiler.cpp +@@ -658,6 +658,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt + case vmIntrinsics::_profileBoolean: + case vmIntrinsics::_isCompileConstant: + case vmIntrinsics::_Preconditions_checkIndex: ++ case vmIntrinsics::_nextInt: + break; + default: + return false; diff --git a/src/hotspot/share/opto/classes.cpp b/src/hotspot/share/opto/classes.cpp index 75f070f7c..d1282ac78 100644 --- a/src/hotspot/share/opto/classes.cpp @@ -4439,7 +5435,7 @@ index 75f070f7c..d1282ac78 100644 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp" #endif diff --git a/src/hotspot/share/opto/classes.hpp b/src/hotspot/share/opto/classes.hpp -index 61b6552d3..b847caf6e 100644 +index c3a6dc55a..bffb5d1d6 100644 --- a/src/hotspot/share/opto/classes.hpp +++ b/src/hotspot/share/opto/classes.hpp @@ -189,17 +189,6 @@ macro(LoadP) @@ -4461,7 +5457,7 @@ index 61b6552d3..b847caf6e 100644 macro(Loop) macro(LoopLimit) diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp -index 8c23b304d..3e45813d7 100644 +index 7e743ee64..43c1dcbf9 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -79,9 +79,6 @@ @@ -4517,7 +5513,7 @@ index 8c23b304d..3e45813d7 100644 _replay_inline_data(NULL), _late_inlines(comp_arena(), 2, 0, NULL), _string_late_inlines(comp_arena(), 2, 0, NULL), -@@ -972,9 +969,6 @@ Compile::Compile( ciEnv* ci_env, +@@ -967,9 +964,6 @@ Compile::Compile( ciEnv* ci_env, bool return_pc, DirectiveSet* directive) : Phase(Compiler), @@ -4527,7 +5523,7 @@ index 8c23b304d..3e45813d7 100644 _compile_id(0), _save_argument_registers(save_arg_registers), _do_locks_coarsening(false), -@@ -1005,6 +999,10 @@ Compile::Compile( ciEnv* ci_env, +@@ -999,6 +993,10 @@ Compile::Compile( ciEnv* ci_env, _in_dump_cnt(0), #endif NOT_PRODUCT(_printer(NULL) COMMA) @@ -4538,7 +5534,7 @@ index 8c23b304d..3e45813d7 100644 _comp_arena(mtCompiler), _node_arena(mtCompiler), _old_arena(mtCompiler), -@@ -2427,13 +2425,6 @@ void Compile::Optimize() { +@@ -2420,13 +2418,6 @@ void Compile::Optimize() { igvn.optimize(); } @@ -4552,7 +5548,7 @@ index 8c23b304d..3e45813d7 100644 #ifdef ASSERT bs->verify_gc_barriers(false); #endif -@@ -3019,29 +3010,6 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) { +@@ -3016,29 +3007,6 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) { break; } @@ -4583,10 +5579,10 @@ index 8c23b304d..3e45813d7 100644 Node *addp = n->in(AddPNode::Address); assert( !addp->is_AddP() || diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp -index 320030e19..1246b1b77 100644 +index a0ec7d496..0f51a0025 100644 --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp -@@ -55,7 +55,6 @@ class ConnectionGraph; +@@ -56,7 +56,6 @@ class ConnectionGraph; class IdealGraphPrinter; class InlineTree; class Int_Array; @@ -4594,7 +5590,7 @@ index 320030e19..1246b1b77 100644 class Matcher; class MachConstantNode; class MachConstantBaseNode; -@@ -1185,11 +1184,7 @@ class Compile : public Phase { +@@ -1212,11 +1211,7 @@ class Compile : public Phase { bool in_scratch_emit_size() const { return _in_scratch_emit_size; } enum ScratchBufferBlob { @@ -4606,7 +5602,7 @@ index 320030e19..1246b1b77 100644 MAX_locs_size = 128, // number of relocInfo elements MAX_const_size = 128, MAX_stubs_size = 128 -@@ -1264,14 +1259,30 @@ class Compile : public Phase { +@@ -1292,14 +1287,30 @@ class Compile : public Phase { // Process an OopMap Element while emitting nodes void Process_OopMap_Node(MachNode *mach, int code_offset); @@ -4640,10 +5636,10 @@ index 320030e19..1246b1b77 100644 // Compute the size of first NumberOfLoopInstrToAlign instructions // at the head of a loop. diff --git a/src/hotspot/share/opto/escape.cpp b/src/hotspot/share/opto/escape.cpp -index cd0ef2648..0a22c89d9 100644 +index 5da7a2f86..23334429e 100644 --- a/src/hotspot/share/opto/escape.cpp +++ b/src/hotspot/share/opto/escape.cpp -@@ -490,13 +490,6 @@ void ConnectionGraph::add_node_to_connection_graph(Node *n, Unique_Node_List *de +@@ -506,13 +506,6 @@ void ConnectionGraph::add_node_to_connection_graph(Node *n, Unique_Node_List *de add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(0), delayed_worklist); } @@ -4657,7 +5653,7 @@ index cd0ef2648..0a22c89d9 100644 break; } case Op_Rethrow: // Exception object escapes -@@ -731,14 +724,6 @@ void ConnectionGraph::add_final_edges(Node *n) { +@@ -747,14 +740,6 @@ void ConnectionGraph::add_final_edges(Node *n) { add_local_var_and_edge(n, PointsToNode::NoEscape, n->in(0), NULL); break; } @@ -4672,8 +5668,81 @@ index cd0ef2648..0a22c89d9 100644 ELSE_FAIL("Op_Proj"); } case Op_Rethrow: // Exception object escapes +diff --git a/src/hotspot/share/opto/graphKit.cpp b/src/hotspot/share/opto/graphKit.cpp +index b44bc313f..8dd81f7a1 100644 +--- a/src/hotspot/share/opto/graphKit.cpp ++++ b/src/hotspot/share/opto/graphKit.cpp +@@ -43,6 +43,7 @@ + #include "opto/runtime.hpp" + #include "runtime/deoptimization.hpp" + #include "runtime/sharedRuntime.hpp" ++#include "utilities/bitMap.inline.hpp" + #include "utilities/macros.hpp" + #if INCLUDE_SHENANDOAHGC + #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp" +@@ -1519,18 +1520,19 @@ Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, + bool require_atomic_access, + bool unaligned, + bool mismatched, +- bool unsafe) { ++ bool unsafe, ++ uint8_t barrier_data) { + assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" ); + const TypePtr* adr_type = NULL; // debug-mode-only argument + debug_only(adr_type = C->get_adr_type(adr_idx)); + Node* mem = memory(adr_idx); + Node* ld; + if (require_atomic_access && bt == T_LONG) { +- ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe); ++ ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe, barrier_data); + } else if (require_atomic_access && bt == T_DOUBLE) { +- ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe); ++ ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo, control_dependency, unaligned, mismatched, unsafe, barrier_data); + } else { +- ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency, unaligned, mismatched, unsafe); ++ ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo, control_dependency, unaligned, mismatched, unsafe, barrier_data); + } + ld = _gvn.transform(ld); + if (((bt == T_OBJECT) && C->do_escape_analysis()) || C->eliminate_boxing()) { +diff --git a/src/hotspot/share/opto/graphKit.hpp b/src/hotspot/share/opto/graphKit.hpp +index 3529cc239..1022fd09b 100644 +--- a/src/hotspot/share/opto/graphKit.hpp ++++ b/src/hotspot/share/opto/graphKit.hpp +@@ -524,27 +524,27 @@ class GraphKit : public Phase { + Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, + MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest, + bool require_atomic_access = false, bool unaligned = false, +- bool mismatched = false, bool unsafe = false) { ++ bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0) { + // This version computes alias_index from bottom_type + return make_load(ctl, adr, t, bt, adr->bottom_type()->is_ptr(), + mo, control_dependency, require_atomic_access, +- unaligned, mismatched, unsafe); ++ unaligned, mismatched, unsafe, barrier_data); + } + Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type, + MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest, + bool require_atomic_access = false, bool unaligned = false, +- bool mismatched = false, bool unsafe = false) { ++ bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0) { + // This version computes alias_index from an address type + assert(adr_type != NULL, "use other make_load factory"); + return make_load(ctl, adr, t, bt, C->get_alias_index(adr_type), + mo, control_dependency, require_atomic_access, +- unaligned, mismatched, unsafe); ++ unaligned, mismatched, unsafe, barrier_data); + } + // This is the base version which is given an alias index. + Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx, + MemNode::MemOrd mo, LoadNode::ControlDependency control_dependency = LoadNode::DependsOnlyOnTest, + bool require_atomic_access = false, bool unaligned = false, +- bool mismatched = false, bool unsafe = false); ++ bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0); + + // Create & transform a StoreNode and store the effect into the + // parser's memory state. diff --git a/src/hotspot/share/opto/lcm.cpp b/src/hotspot/share/opto/lcm.cpp -index 05ec9fa9f..16b80bfc3 100644 +index e97a4437f..f3fea9965 100644 --- a/src/hotspot/share/opto/lcm.cpp +++ b/src/hotspot/share/opto/lcm.cpp @@ -169,7 +169,6 @@ void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allo @@ -4684,11 +5753,50 @@ index 05ec9fa9f..16b80bfc3 100644 case Op_LoadN: case Op_LoadS: case Op_LoadKlass: +diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp +index 515665ffa..2da775a6a 100644 +--- a/src/hotspot/share/opto/library_call.cpp ++++ b/src/hotspot/share/opto/library_call.cpp +@@ -337,6 +337,7 @@ class LibraryCallKit : public GraphKit { + + bool inline_profileBoolean(); + bool inline_isCompileConstant(); ++ bool inline_nextIntRuntime(); + void clear_upper_avx() { + #ifdef X86 + if (UseAVX >= 2) { +@@ -887,6 +888,9 @@ bool LibraryCallKit::try_to_inline(int predicate) { + case vmIntrinsics::_isCompileConstant: + return inline_isCompileConstant(); + ++ case vmIntrinsics::_nextInt: ++ return SharedRuntime::_opt_for_aarch64 ? inline_nextIntRuntime() : false; ++ + case vmIntrinsics::_hasNegatives: + return inline_hasNegatives(); + +@@ -6989,3 +6993,16 @@ bool LibraryCallKit::inline_isCompileConstant() { + set_result(n->is_Con() ? intcon(1) : intcon(0)); + return true; + } ++ ++bool LibraryCallKit::inline_nextIntRuntime() { ++ Node* ctrl = control(); ++ Node* monotonical_incr_adr = makecon(TypeRawPtr::make(SharedRuntime::monotonical_incr_addr())); ++ int adr_type = Compile::AliasIdxRaw; ++ ++ Node* monotonical_incr = make_load(ctrl, monotonical_incr_adr, TypeInt::INT, T_INT, adr_type, MemNode::unordered); ++ Node* incr = _gvn.transform(new AddINode(monotonical_incr, _gvn.intcon(13))); ++ store_to_memory(ctrl, monotonical_incr_adr, incr, T_INT, adr_type, MemNode::unordered); ++ ++ set_result(incr); ++ return true; ++} diff --git a/src/hotspot/share/opto/loopnode.cpp b/src/hotspot/share/opto/loopnode.cpp -index d7eb3996b..366d0f378 100644 +index 5ad560fdd..7a6436c62 100644 --- a/src/hotspot/share/opto/loopnode.cpp +++ b/src/hotspot/share/opto/loopnode.cpp -@@ -4300,7 +4300,6 @@ void PhaseIdealLoop::build_loop_late_post( Node *n ) { +@@ -4596,7 +4596,6 @@ void PhaseIdealLoop::build_loop_late_post( Node *n ) { case Op_LoadL: case Op_LoadS: case Op_LoadP: @@ -4697,7 +5805,7 @@ index d7eb3996b..366d0f378 100644 case Op_LoadRange: case Op_LoadD_unaligned: diff --git a/src/hotspot/share/opto/loopopts.cpp b/src/hotspot/share/opto/loopopts.cpp -index a32f1f5f2..ffbd84aee 100644 +index 27bf3a3c1..c170a0395 100644 --- a/src/hotspot/share/opto/loopopts.cpp +++ b/src/hotspot/share/opto/loopopts.cpp @@ -41,9 +41,6 @@ @@ -4711,7 +5819,7 @@ index a32f1f5f2..ffbd84aee 100644 //============================================================================= //------------------------------split_thru_phi--------------------------------- diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp -index 66adf9be1..90d5b0af1 100644 +index a52325680..8fb75d0d6 100644 --- a/src/hotspot/share/opto/machnode.hpp +++ b/src/hotspot/share/opto/machnode.hpp @@ -197,7 +197,7 @@ public: @@ -4745,10 +5853,10 @@ index 66adf9be1..90d5b0af1 100644 // more leafs. Must be set by MachNode constructor to point to an // internal array of MachOpers. The MachOper array is sized by diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp -index 45d262776..05fdab21e 100644 +index 2d3bafdd7..4cc7580a8 100644 --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp -@@ -1752,6 +1752,13 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) { +@@ -1754,6 +1754,13 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) { _shared_nodes.map(leaf->_idx, ex); } @@ -4762,7 +5870,7 @@ index 45d262776..05fdab21e 100644 return ex; } -@@ -2171,17 +2178,6 @@ void Matcher::find_shared( Node *n ) { +@@ -2173,17 +2180,6 @@ void Matcher::find_shared( Node *n ) { case Op_SafePoint: mem_op = true; break; @@ -4780,7 +5888,7 @@ index 45d262776..05fdab21e 100644 default: if( n->is_Store() ) { // Do match stores, despite no ideal reg -@@ -2294,33 +2290,6 @@ void Matcher::find_shared( Node *n ) { +@@ -2296,33 +2292,6 @@ void Matcher::find_shared( Node *n ) { n->del_req(LoadStoreConditionalNode::ExpectedIn); break; } @@ -4815,7 +5923,7 @@ index 45d262776..05fdab21e 100644 case Op_CMoveF: case Op_CMoveI: diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp -index 651bbfcf1..3a5e45401 100644 +index 8ffb5a708..32ce0f9bc 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -49,9 +49,6 @@ @@ -4828,7 +5936,55 @@ index 651bbfcf1..3a5e45401 100644 #if INCLUDE_SHENANDOAHGC #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp" #endif -@@ -2852,7 +2849,7 @@ LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const Ty +@@ -858,7 +855,7 @@ bool LoadNode::is_immutable_value(Node* adr) { + //----------------------------LoadNode::make----------------------------------- + // Polymorphic factory method: + Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, MemOrd mo, +- ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) { ++ ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) { + Compile* C = gvn.C; + + // sanity check the alias category against the created node type +@@ -909,6 +906,7 @@ Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypeP + if (unsafe) { + load->set_unsafe_access(); + } ++ load->set_barrier_data(barrier_data); + if (load->Opcode() == Op_LoadN) { + Node* ld = gvn.transform(load); + return new DecodeNNode(ld, ld->bottom_type()->make_ptr()); +@@ -918,7 +916,7 @@ Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypeP + } + + LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, +- ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) { ++ ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) { + bool require_atomic = true; + LoadLNode* load = new LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, control_dependency, require_atomic); + if (unaligned) { +@@ -930,11 +928,12 @@ LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr + if (unsafe) { + load->set_unsafe_access(); + } ++ load->set_barrier_data(barrier_data); + return load; + } + + LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo, +- ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe) { ++ ControlDependency control_dependency, bool unaligned, bool mismatched, bool unsafe, uint8_t barrier_data) { + bool require_atomic = true; + LoadDNode* load = new LoadDNode(ctl, mem, adr, adr_type, rt, mo, control_dependency, require_atomic); + if (unaligned) { +@@ -946,6 +945,7 @@ LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr + if (unsafe) { + load->set_unsafe_access(); + } ++ load->set_barrier_data(barrier_data); + return load; + } + +@@ -2891,7 +2891,7 @@ LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const Ty : Node(required), _type(rt), _adr_type(at), @@ -4838,7 +5994,7 @@ index 651bbfcf1..3a5e45401 100644 init_req(MemNode::Control, c ); init_req(MemNode::Memory , mem); diff --git a/src/hotspot/share/opto/memnode.hpp b/src/hotspot/share/opto/memnode.hpp -index 75f283ba8..abf07a233 100644 +index d4c2895bf..259b4343f 100644 --- a/src/hotspot/share/opto/memnode.hpp +++ b/src/hotspot/share/opto/memnode.hpp @@ -43,6 +43,8 @@ private: @@ -4915,7 +6071,17 @@ index 75f283ba8..abf07a233 100644 init_class_id(Class_Load); } inline bool is_unordered() const { return !is_acquire(); } -@@ -265,10 +280,6 @@ public: +@@ -213,7 +228,8 @@ public: + static Node* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr, + const TypePtr* at, const Type *rt, BasicType bt, + MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest, +- bool unaligned = false, bool mismatched = false, bool unsafe = false); ++ bool unaligned = false, bool mismatched = false, bool unsafe = false, ++ uint8_t barrier_data = 0); + + virtual uint hash() const; // Check the type + +@@ -265,10 +281,6 @@ public: Node* convert_to_unsigned_load(PhaseGVN& gvn); Node* convert_to_signed_load(PhaseGVN& gvn); @@ -4926,7 +6092,25 @@ index 75f283ba8..abf07a233 100644 #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; #endif -@@ -817,7 +828,7 @@ class LoadStoreNode : public Node { +@@ -398,7 +410,7 @@ public: + bool require_atomic_access() const { return _require_atomic_access; } + static LoadLNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, + const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest, +- bool unaligned = false, bool mismatched = false, bool unsafe = false); ++ bool unaligned = false, bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0); + #ifndef PRODUCT + virtual void dump_spec(outputStream *st) const { + LoadNode::dump_spec(st); +@@ -450,7 +462,7 @@ public: + bool require_atomic_access() const { return _require_atomic_access; } + static LoadDNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, + const Type* rt, MemOrd mo, ControlDependency control_dependency = DependsOnlyOnTest, +- bool unaligned = false, bool mismatched = false, bool unsafe = false); ++ bool unaligned = false, bool mismatched = false, bool unsafe = false, uint8_t barrier_data = 0); + #ifndef PRODUCT + virtual void dump_spec(outputStream *st) const { + LoadNode::dump_spec(st); +@@ -817,7 +829,7 @@ class LoadStoreNode : public Node { private: const Type* const _type; // What kind of value is loaded? const TypePtr* _adr_type; // What kind of memory is being addressed? @@ -4935,7 +6119,7 @@ index 75f283ba8..abf07a233 100644 virtual uint size_of() const; // Size is bigger public: LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, const TypePtr* at, const Type* rt, uint required ); -@@ -830,8 +841,9 @@ public: +@@ -831,8 +843,9 @@ public: bool result_not_used() const; MemBarNode* trailing_membar() const; @@ -4947,7 +6131,7 @@ index 75f283ba8..abf07a233 100644 }; class LoadStoreConditionalNode : public LoadStoreNode { -@@ -883,6 +895,7 @@ public: +@@ -885,6 +898,7 @@ public: MemNode::MemOrd order() const { return _mem_ord; } @@ -4955,7 +6139,7 @@ index 75f283ba8..abf07a233 100644 }; class CompareAndExchangeNode : public LoadStoreNode { -@@ -900,6 +913,7 @@ public: +@@ -902,6 +916,7 @@ public: MemNode::MemOrd order() const { return _mem_ord; } @@ -4964,10 +6148,10 @@ index 75f283ba8..abf07a233 100644 //------------------------------CompareAndSwapBNode--------------------------- diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp -index bb020c408..5a5e44ecd 100644 +index e439ebb93..84a56f8d0 100644 --- a/src/hotspot/share/opto/node.cpp +++ b/src/hotspot/share/opto/node.cpp -@@ -546,9 +546,6 @@ Node *Node::clone() const { +@@ -550,9 +550,6 @@ Node *Node::clone() const { if (n->is_SafePoint()) { n->as_SafePoint()->clone_replaced_nodes(); } @@ -4977,7 +6161,7 @@ index bb020c408..5a5e44ecd 100644 return n; // Return the clone } -@@ -1471,10 +1468,6 @@ bool Node::needs_anti_dependence_check() const { +@@ -1478,10 +1475,6 @@ bool Node::needs_anti_dependence_check() const { if( req() < 2 || (_flags & Flag_needs_anti_dependence_check) == 0 ) { return false; } @@ -4989,10 +6173,10 @@ index bb020c408..5a5e44ecd 100644 } diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp -index 15e6ef893..de782b880 100644 +index f3bd41d91..6efaa6fc7 100644 --- a/src/hotspot/share/opto/node.hpp +++ b/src/hotspot/share/opto/node.hpp -@@ -82,8 +82,6 @@ class JVMState; +@@ -83,8 +83,6 @@ class JVMState; class JumpNode; class JumpProjNode; class LoadNode; @@ -5001,7 +6185,7 @@ index 15e6ef893..de782b880 100644 class LoadStoreNode; class LoadStoreConditionalNode; class LockNode; -@@ -645,7 +643,6 @@ public: +@@ -648,7 +646,6 @@ public: DEFINE_CLASS_ID(MemBar, Multi, 3) DEFINE_CLASS_ID(Initialize, MemBar, 0) DEFINE_CLASS_ID(MemBarStoreStore, MemBar, 1) @@ -5009,7 +6193,7 @@ index 15e6ef893..de782b880 100644 DEFINE_CLASS_ID(Mach, Node, 1) DEFINE_CLASS_ID(MachReturn, Mach, 0) -@@ -697,7 +694,6 @@ public: +@@ -700,7 +697,6 @@ public: DEFINE_CLASS_ID(Mem, Node, 4) DEFINE_CLASS_ID(Load, Mem, 0) DEFINE_CLASS_ID(LoadVector, Load, 0) @@ -5017,7 +6201,7 @@ index 15e6ef893..de782b880 100644 DEFINE_CLASS_ID(Store, Mem, 1) DEFINE_CLASS_ID(StoreVector, Store, 0) DEFINE_CLASS_ID(LoadStore, Mem, 2) -@@ -841,8 +837,6 @@ public: +@@ -845,8 +841,6 @@ public: DEFINE_CLASS_QUERY(Load) DEFINE_CLASS_QUERY(LoadStore) DEFINE_CLASS_QUERY(LoadStoreConditional) @@ -5027,7 +6211,7 @@ index 15e6ef893..de782b880 100644 DEFINE_CLASS_QUERY(Loop) DEFINE_CLASS_QUERY(Mach) diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp -index 4ccf75783..f22029256 100644 +index b6540e06a..5b9873b4d 100644 --- a/src/hotspot/share/opto/output.cpp +++ b/src/hotspot/share/opto/output.cpp @@ -32,6 +32,8 @@ @@ -5462,7 +6646,7 @@ index 4ccf75783..f22029256 100644 // Have we run out of code space? if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { -@@ -1264,12 +1272,12 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { +@@ -1265,12 +1273,12 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { Process_OopMap_Node(mach, current_offset); } // End if safepoint @@ -5477,7 +6661,7 @@ index 4ccf75783..f22029256 100644 else if (mach->is_MachBranch()) { // This requires the TRUE branch target be in succs[0] uint block_num = block->non_connector_successor(0)->_pre_order; -@@ -1280,8 +1288,8 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { +@@ -1281,8 +1289,8 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { bool delay_slot_is_used = valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay(); if (!delay_slot_is_used && mach->may_be_short_branch()) { @@ -5488,7 +6672,7 @@ index 4ccf75783..f22029256 100644 int offset = blk_starts[block_num] - current_offset; if (block_num >= i) { // Current and following block's offset are not -@@ -1339,7 +1347,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { +@@ -1340,7 +1348,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { } } #ifdef ASSERT @@ -5497,7 +6681,7 @@ index 4ccf75783..f22029256 100644 else if (mach->ideal_Opcode() == Op_StoreCM) { uint storeCM_idx = j; int count = 0; -@@ -1517,6 +1525,10 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { +@@ -1519,6 +1527,10 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { } #endif @@ -5508,7 +6692,7 @@ index 4ccf75783..f22029256 100644 #ifndef PRODUCT // Information on the size of the method, without the extraneous code Scheduling::increment_method_size(cb->insts_size()); -@@ -1681,20 +1693,20 @@ uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+ +@@ -1682,20 +1694,20 @@ uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+ // Initializer for class Scheduling Scheduling::Scheduling(Arena *arena, Compile &compile) @@ -5542,7 +6726,7 @@ index 4ccf75783..f22029256 100644 #endif { // Create a MachNopNode -@@ -1773,8 +1785,8 @@ void Scheduling::step_and_clear() { +@@ -1774,8 +1786,8 @@ void Scheduling::step_and_clear() { _bundle_use.reset(); memcpy(_bundle_use_elements, @@ -5553,7 +6737,7 @@ index 4ccf75783..f22029256 100644 } // Perform instruction scheduling and bundling over the sequence of -@@ -1801,6 +1813,22 @@ void Compile::ScheduleAndBundle() { +@@ -1802,6 +1814,22 @@ void Compile::ScheduleAndBundle() { // Walk backwards over each basic block, computing the needed alignment // Walk over all the basic blocks scheduling.DoScheduling(); @@ -5576,7 +6760,7 @@ index 4ccf75783..f22029256 100644 } // Compute the latency of all the instructions. This is fairly simple, -@@ -1869,7 +1897,7 @@ bool Scheduling::NodeFitsInBundle(Node *n) { +@@ -1870,7 +1898,7 @@ bool Scheduling::NodeFitsInBundle(Node *n) { #ifndef PRODUCT if (_cfg->C->trace_opto_output()) tty->print("# NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n", @@ -5585,7 +6769,7 @@ index 4ccf75783..f22029256 100644 #endif return (false); } -@@ -2094,12 +2122,12 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { +@@ -2095,12 +2123,12 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { // Don't allow safepoints in the branch shadow, that will // cause a number of difficulties if ( avail_pipeline->instructionCount() == 1 && @@ -5604,7 +6788,7 @@ index 4ccf75783..f22029256 100644 if (d->is_Mach() && !d->is_MachSafePoint()) { // A node that fits in the delay slot was found, so we need to -@@ -2144,13 +2172,13 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { +@@ -2145,13 +2173,13 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { // step of the bundles if (!NodeFitsInBundle(n)) { #ifndef PRODUCT @@ -5623,7 +6807,7 @@ index 4ccf75783..f22029256 100644 } } -@@ -2196,8 +2224,8 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { +@@ -2197,8 +2225,8 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) { #ifndef PRODUCT if (_cfg->C->trace_opto_output()) tty->print("# *** STEP(%d >= %d instructions) ***\n", @@ -5634,7 +6818,7 @@ index 4ccf75783..f22029256 100644 #endif step(1); } -@@ -2403,7 +2431,7 @@ void Scheduling::DoScheduling() { +@@ -2404,7 +2432,7 @@ void Scheduling::DoScheduling() { } assert(!last->is_Mach() || last->as_Mach()->ideal_Opcode() != Op_Con, ""); if( last->is_Catch() || @@ -5643,7 +6827,7 @@ index 4ccf75783..f22029256 100644 // There might be a prior call. Skip it. while (_bb_start < _bb_end && bb->get_node(--_bb_end)->is_MachProj()); } else if( last->is_MachNullCheck() ) { -@@ -2473,7 +2501,7 @@ void Scheduling::DoScheduling() { +@@ -2474,7 +2502,7 @@ void Scheduling::DoScheduling() { } #endif #ifdef ASSERT @@ -5652,7 +6836,7 @@ index 4ccf75783..f22029256 100644 #endif } -@@ -2821,31 +2849,31 @@ void Scheduling::ComputeRegisterAntidependencies(Block *b) { +@@ -2822,31 +2850,31 @@ void Scheduling::ComputeRegisterAntidependencies(Block *b) { // void Scheduling::garbage_collect_pinch_nodes() { #ifndef PRODUCT @@ -5704,7 +6888,7 @@ index 4ccf75783..f22029256 100644 #endif } -@@ -2882,19 +2910,19 @@ void Scheduling::dump_available() const { +@@ -2883,19 +2911,19 @@ void Scheduling::dump_available() const { void Scheduling::print_statistics() { // Print the size added by nops for bundling tty->print("Nops added %d bytes to total of %d bytes", @@ -5728,7 +6912,7 @@ index 4ccf75783..f22029256 100644 tty->print("\n"); } -@@ -2908,6 +2936,6 @@ void Scheduling::print_statistics() { +@@ -2909,6 +2937,6 @@ void Scheduling::print_statistics() { if (total_bundles > 0) tty->print("Average ILP (excluding nops) is %.2f\n", @@ -5752,11 +6936,23 @@ index ab3c1a304..ec3cc2981 100644 }; //------------------------------Scheduling---------------------------------- +diff --git a/src/hotspot/share/opto/parse1.cpp b/src/hotspot/share/opto/parse1.cpp +index 8286f8c4d..78149369d 100644 +--- a/src/hotspot/share/opto/parse1.cpp ++++ b/src/hotspot/share/opto/parse1.cpp +@@ -41,6 +41,7 @@ + #include "runtime/handles.inline.hpp" + #include "runtime/safepointMechanism.hpp" + #include "runtime/sharedRuntime.hpp" ++#include "utilities/bitMap.inline.hpp" + #include "utilities/copy.hpp" + + // Static array so we can figure out which bytecodes stop us from compiling diff --git a/src/hotspot/share/opto/phaseX.cpp b/src/hotspot/share/opto/phaseX.cpp -index 07b849040..9d5d4deed 100644 +index 41971513f..3d71d941c 100644 --- a/src/hotspot/share/opto/phaseX.cpp +++ b/src/hotspot/share/opto/phaseX.cpp -@@ -1648,14 +1648,14 @@ void PhaseIterGVN::add_users_to_worklist( Node *n ) { +@@ -1726,14 +1726,14 @@ void PhaseIterGVN::add_users_to_worklist( Node *n ) { // of the mirror load depends on the type of 'n'. See LoadNode::Value(). // LoadBarrier?(LoadP(LoadP(AddP(foo:Klass, #java_mirror)))) BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); @@ -5773,7 +6969,7 @@ index 07b849040..9d5d4deed 100644 // Search for load barriers behind the load for (DUIterator_Fast i3max, i3 = u->fast_outs(i3max); i3 < i3max; i3++) { Node* b = u->fast_out(i3); -@@ -1818,14 +1818,14 @@ void PhaseCCP::analyze() { +@@ -1927,14 +1927,14 @@ void PhaseCCP::analyze() { // Loading the java mirror from a Klass requires two loads and the type // of the mirror load depends on the type of 'n'. See LoadNode::Value(). BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); @@ -5791,10 +6987,10 @@ index 07b849040..9d5d4deed 100644 for (DUIterator_Fast i3max, i3 = u->fast_outs(i3max); i3 < i3max; i3++) { Node* b = u->fast_out(i3); diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp -index 0258db3e6..fae147fa8 100644 +index 1ee9db8f0..1f2cf2c64 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp -@@ -264,7 +264,6 @@ void VectorNode::vector_operands(Node* n, uint* start, uint* end) { +@@ -286,7 +286,6 @@ void VectorNode::vector_operands(Node* n, uint* start, uint* end) { case Op_LoadI: case Op_LoadL: case Op_LoadF: case Op_LoadD: case Op_LoadP: case Op_LoadN: @@ -5802,3 +6998,193 @@ index 0258db3e6..fae147fa8 100644 *start = 0; *end = 0; // no vector operands break; +diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp +index 595ff7495..3f366633e 100644 +--- a/src/hotspot/share/runtime/sharedRuntime.cpp ++++ b/src/hotspot/share/runtime/sharedRuntime.cpp +@@ -205,6 +205,8 @@ void SharedRuntime::print_ic_miss_histogram() { + } + #endif // PRODUCT + ++int SharedRuntime::_monotonical_incr = 0; ++bool SharedRuntime::_opt_for_aarch64 = false; + + JRT_LEAF(jlong, SharedRuntime::lmul(jlong y, jlong x)) + return x * y; +diff --git a/src/hotspot/share/runtime/sharedRuntime.hpp b/src/hotspot/share/runtime/sharedRuntime.hpp +index a11009837..eba82d453 100644 +--- a/src/hotspot/share/runtime/sharedRuntime.hpp ++++ b/src/hotspot/share/runtime/sharedRuntime.hpp +@@ -596,6 +596,11 @@ class SharedRuntime: AllStatic { + static void print_ic_miss_histogram(); + + #endif // PRODUCT ++ ++ static int _monotonical_incr; ++ static bool _opt_for_aarch64; ++ ++ static address monotonical_incr_addr() { return (address)&_monotonical_incr; } + }; + + +diff --git a/src/hotspot/share/utilities/bitMap.hpp b/src/hotspot/share/utilities/bitMap.hpp +index c671535c9..e26f34687 100644 +--- a/src/hotspot/share/utilities/bitMap.hpp ++++ b/src/hotspot/share/utilities/bitMap.hpp +@@ -26,6 +26,7 @@ + #define SHARE_VM_UTILITIES_BITMAP_HPP + + #include "memory/allocation.hpp" ++#include "runtime/atomic.hpp" + #include "utilities/align.hpp" + #include "utilities/globalDefinitions.hpp" + +@@ -95,6 +96,8 @@ class BitMap { + void set_word (idx_t word) { set_word(word, ~(bm_word_t)0); } + void clear_word(idx_t word) { _map[word] = 0; } + ++ static inline const bm_word_t load_word_ordered(const volatile bm_word_t* const addr, atomic_memory_order memory_order); ++ + // Utilities for ranges of bits. Ranges are half-open [beg, end). + + // Ranges within a single word. +@@ -194,6 +197,9 @@ class BitMap { + return (*word_addr(index) & bit_mask(index)) != 0; + } + ++ // memory_order must be memory_order_relaxed or memory_order_acquire. ++ bool par_at(idx_t index, atomic_memory_order memory_order = memory_order_acquire) const; ++ + // Align bit index up or down to the next bitmap word boundary, or check + // alignment. + static idx_t word_align_up(idx_t bit) { +@@ -210,9 +216,14 @@ class BitMap { + inline void set_bit(idx_t bit); + inline void clear_bit(idx_t bit); + +- // Atomically set or clear the specified bit. +- inline bool par_set_bit(idx_t bit); +- inline bool par_clear_bit(idx_t bit); ++ // Attempts to change a bit to a desired value. The operation returns true if ++ // this thread changed the value of the bit. It was changed with a RMW operation ++ // using the specified memory_order. The operation returns false if the change ++ // could not be set due to the bit already being observed in the desired state. ++ // The atomic access that observed the bit in the desired state has acquire ++ // semantics, unless memory_order is memory_order_relaxed or memory_order_release. ++ inline bool par_set_bit(idx_t bit, atomic_memory_order memory_order = memory_order_conservative); ++ inline bool par_clear_bit(idx_t bit, atomic_memory_order memory_order = memory_order_conservative); + + // Put the given value at the given offset. The parallel version + // will CAS the value into the bitmap and is quite a bit slower. +diff --git a/src/hotspot/share/utilities/bitMap.inline.hpp b/src/hotspot/share/utilities/bitMap.inline.hpp +index b10726d18..7a7e2ad43 100644 +--- a/src/hotspot/share/utilities/bitMap.inline.hpp ++++ b/src/hotspot/share/utilities/bitMap.inline.hpp +@@ -26,6 +26,7 @@ + #define SHARE_VM_UTILITIES_BITMAP_INLINE_HPP + + #include "runtime/atomic.hpp" ++#include "runtime/orderAccess.hpp" + #include "utilities/bitMap.hpp" + + inline void BitMap::set_bit(idx_t bit) { +@@ -38,18 +39,39 @@ inline void BitMap::clear_bit(idx_t bit) { + *word_addr(bit) &= ~bit_mask(bit); + } + +-inline bool BitMap::par_set_bit(idx_t bit) { ++inline const BitMap::bm_word_t BitMap::load_word_ordered(const volatile bm_word_t* const addr, atomic_memory_order memory_order) { ++ if (memory_order == memory_order_relaxed || memory_order == memory_order_release) { ++ return Atomic::load(addr); ++ } else { ++ assert(memory_order == memory_order_acq_rel || ++ memory_order == memory_order_acquire || ++ memory_order == memory_order_conservative, ++ "unexpected memory ordering"); ++ return OrderAccess::load_acquire(addr); ++ } ++} ++ ++inline bool BitMap::par_at(idx_t index, atomic_memory_order memory_order) const { ++ verify_index(index); ++ assert(memory_order == memory_order_acquire || ++ memory_order == memory_order_relaxed, ++ "unexpected memory ordering"); ++ const volatile bm_word_t* const addr = word_addr(index); ++ return (load_word_ordered(addr, memory_order) & bit_mask(index)) != 0; ++} ++ ++inline bool BitMap::par_set_bit(idx_t bit, atomic_memory_order memory_order) { + verify_index(bit); + volatile bm_word_t* const addr = word_addr(bit); + const bm_word_t mask = bit_mask(bit); +- bm_word_t old_val = *addr; ++ bm_word_t old_val = load_word_ordered(addr, memory_order); + + do { + const bm_word_t new_val = old_val | mask; + if (new_val == old_val) { + return false; // Someone else beat us to it. + } +- const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val); ++ const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val, memory_order); + if (cur_val == old_val) { + return true; // Success. + } +@@ -57,18 +79,18 @@ inline bool BitMap::par_set_bit(idx_t bit) { + } while (true); + } + +-inline bool BitMap::par_clear_bit(idx_t bit) { ++inline bool BitMap::par_clear_bit(idx_t bit, atomic_memory_order memory_order) { + verify_index(bit); + volatile bm_word_t* const addr = word_addr(bit); + const bm_word_t mask = ~bit_mask(bit); +- bm_word_t old_val = *addr; ++ bm_word_t old_val = load_word_ordered(addr, memory_order); + + do { + const bm_word_t new_val = old_val & mask; + if (new_val == old_val) { + return false; // Someone else beat us to it. + } +- const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val); ++ const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val, memory_order); + if (cur_val == old_val) { + return true; // Success. + } +diff --git a/src/java.base/share/classes/java/util/Random.java b/src/java.base/share/classes/java/util/Random.java +index 92c1193cb..aaf3da581 100644 +--- a/src/java.base/share/classes/java/util/Random.java ++++ b/src/java.base/share/classes/java/util/Random.java +@@ -35,6 +35,7 @@ import java.util.stream.LongStream; + import java.util.stream.StreamSupport; + + import jdk.internal.misc.Unsafe; ++import jdk.internal.HotSpotIntrinsicCandidate; + + /** + * An instance of this class is used to generate a stream of +@@ -325,6 +326,7 @@ class Random implements java.io.Serializable { + * @return the next pseudorandom, uniformly distributed {@code int} + * value from this random number generator's sequence + */ ++ @HotSpotIntrinsicCandidate + public int nextInt() { + return next(32); + } +diff --git a/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java b/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java +index a8aff4775..afadfd68a 100644 +--- a/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java ++++ b/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java +@@ -75,6 +75,7 @@ public class MemberNameLeak { + test("-XX:+UseG1GC"); + test("-XX:+UseParallelGC"); + test("-XX:+UseSerialGC"); ++ test("-XX:+UseZGC"); + if (!Compiler.isGraalEnabled()) { // Graal does not support CMS and Shenandoah + test("-XX:+UseConcMarkSweepGC"); + if (GC.Shenandoah.isSupported()) { +-- +2.19.1 + diff --git a/ZGC-aarch64-fix-not-using-load-store-Pre-index.patch b/ZGC-aarch64-fix-not-using-load-store-Pre-index.patch deleted file mode 100644 index 36e79419c2f2232ff6f6560333f38d90a286bd8d..0000000000000000000000000000000000000000 --- a/ZGC-aarch64-fix-not-using-load-store-Pre-index.patch +++ /dev/null @@ -1,58 +0,0 @@ -From e8bf6d9c5a02b3ffaf223dd1109bc15c664cca28 Mon Sep 17 00:00:00 2001 -Date: Mon, 24 Feb 2020 18:51:09 +0800 -Subject: [PATCH] ZGC: aarch64: fix not using load/store Pre-indexed - addressing to modify sp - -Summary: : -LLT: JFUZZ -Bug url: ---- - src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp | 16 +++++----------- - 1 file changed, 5 insertions(+), 11 deletions(-) - -diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -index a65a605d0..6db979b57 100644 ---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -@@ -2114,12 +2114,11 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { - return 0; - } - -+ add(stack, stack, -count * wordSize * 2); -+ - if (count & 1) { -- strq(as_FloatRegister(regs[0]), Address(pre(stack, -count * wordSize * 2))); -+ strq(as_FloatRegister(regs[0]), Address(stack)); - i += 1; -- } else { -- stpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -count * wordSize * 2))); -- i += 2; - } - - for (; i < count; i += 2) { -@@ -2145,20 +2144,15 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { - } - - if (count & 1) { -+ ldrq(as_FloatRegister(regs[0]), Address(stack)); - i += 1; -- } else { -- i += 2; - } - - for (; i < count; i += 2) { - ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2)); - } - -- if ((count & 1) == 0) { -- ldpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, count * wordSize * 2))); -- } else { -- ldrq(as_FloatRegister(regs[0]), Address(post(stack, count * wordSize * 2))); -- } -+ add(stack, stack, count * wordSize * 2); - - return count; - } --- -2.12.3 - diff --git a/ZGC-aarch64-fix-system-call-number-of-memfd_create.patch b/ZGC-aarch64-fix-system-call-number-of-memfd_create.patch deleted file mode 100644 index ee83a44e005449868c4200cf86fe552ca184d75e..0000000000000000000000000000000000000000 --- a/ZGC-aarch64-fix-system-call-number-of-memfd_create.patch +++ /dev/null @@ -1,28 +0,0 @@ -From e25b331a945301e24429c120bef1ed0daf04d49c Mon Sep 17 00:00:00 2001 -Date: Fri, 3 Apr 2020 17:12:16 +0800 -Subject: [PATCH] ZGC: aarch64: Fix MR 32, fix system call number of - memfd_create - -Summary: : -LLT: N/A -Bug url: N/A ---- - src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp -index 47894b5..f956b53 100644 ---- a/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp -+++ b/src/hotspot/os_cpu/linux_aarch64/gc/z/zBackingFile_linux_aarch64.cpp -@@ -51,7 +51,7 @@ - - // Support for building on older Linux systems - #ifndef __NR_memfd_create --#define __NR_memfd_create 319 -+#define __NR_memfd_create 279 - #endif - #ifndef MFD_CLOEXEC - #define MFD_CLOEXEC 0x0001U --- -1.8.3.1 - diff --git a/ZGC-aarch64-not-using-zr-register-avoid-sigill-in-Ma.patch b/ZGC-aarch64-not-using-zr-register-avoid-sigill-in-Ma.patch deleted file mode 100644 index 38cac065a20a19b849e73c1718ace4df962f2025..0000000000000000000000000000000000000000 --- a/ZGC-aarch64-not-using-zr-register-avoid-sigill-in-Ma.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 425112071e77e2fb599d1f96ce48689d45461261 Mon Sep 17 00:00:00 2001 -Date: Mon, 17 Feb 2020 18:55:47 +0800 -Subject: [PATCH] ZGC: aarch64: not using zr register avoid sigill in - MacroAssembler::push_fp and pop_fp - -Summary: : -LLT: jtreg -Bug url: ---- - src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp | 48 +++++++++++++--------- - 1 file changed, 28 insertions(+), 20 deletions(-) - -diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -index 611f13b0e..a65a605d0 100644 ---- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -@@ -2100,58 +2100,66 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) { - // Push lots of registers in the bit set supplied. Don't push sp. - // Return the number of words pushed - int MacroAssembler::push_fp(unsigned int bitset, Register stack) { -- int words_pushed = 0; -- - // Scan bitset to accumulate register pairs - unsigned char regs[32]; - int count = 0; -+ int i = 0; - for (int reg = 0; reg <= 31; reg++) { - if (1 & bitset) - regs[count++] = reg; - bitset >>= 1; - } -- regs[count++] = zr->encoding_nocheck(); -- count &= ~1; // Only push an even number of regs - -- // Always pushing full 128 bit registers. -- if (count) { -+ if (!count) { -+ return 0; -+ } -+ -+ if (count & 1) { -+ strq(as_FloatRegister(regs[0]), Address(pre(stack, -count * wordSize * 2))); -+ i += 1; -+ } else { - stpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -count * wordSize * 2))); -- words_pushed += 2; -+ i += 2; - } -- for (int i = 2; i < count; i += 2) { -+ -+ for (; i < count; i += 2) { - stpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2)); -- words_pushed += 2; - } - -- assert(words_pushed == count, "oops, pushed != count"); - return count; - } - - int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { -- int words_pushed = 0; -- - // Scan bitset to accumulate register pairs - unsigned char regs[32]; - int count = 0; -+ int i = 0; - for (int reg = 0; reg <= 31; reg++) { - if (1 & bitset) - regs[count++] = reg; - bitset >>= 1; - } -- regs[count++] = zr->encoding_nocheck(); -- count &= ~1; - -- for (int i = 2; i < count; i += 2) { -+ if (!count) { -+ return 0; -+ } -+ -+ if (count & 1) { -+ i += 1; -+ } else { -+ i += 2; -+ } -+ -+ for (; i < count; i += 2) { - ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2)); -- words_pushed += 2; - } -- if (count) { -+ -+ if ((count & 1) == 0) { - ldpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, count * wordSize * 2))); -- words_pushed += 2; -+ } else { -+ ldrq(as_FloatRegister(regs[0]), Address(post(stack, count * wordSize * 2))); - } - -- assert(words_pushed == count, "oops, pushed != count"); -- - return count; - } - --- -2.12.3 - diff --git a/ZGC-reuse-entries-of-ResolvedMethodTable.patch b/ZGC-reuse-entries-of-ResolvedMethodTable.patch deleted file mode 100644 index ce91594255e2e203e1e963f4671eaebc79da31ad..0000000000000000000000000000000000000000 --- a/ZGC-reuse-entries-of-ResolvedMethodTable.patch +++ /dev/null @@ -1,35 +0,0 @@ -diff --git a/src/hotspot/share/gc/z/zHeap.cpp b/src/hotspot/share/gc/z/zHeap.cpp -index 62f97d2..e950acf 100644 ---- a/src/hotspot/share/gc/z/zHeap.cpp -+++ b/src/hotspot/share/gc/z/zHeap.cpp -@@ -49,6 +49,7 @@ - #include "runtime/thread.hpp" - #include "utilities/align.hpp" - #include "utilities/debug.hpp" -+#include "prims/resolvedMethodTable.hpp" - - static const ZStatSampler ZSamplerHeapUsedBeforeMark("Memory", "Heap Used Before Mark", ZStatUnitBytes); - static const ZStatSampler ZSamplerHeapUsedAfterMark("Memory", "Heap Used After Mark", ZStatUnitBytes); -@@ -334,6 +335,10 @@ bool ZHeap::mark_end() { - Universe::verify(); - } - -+ // Free unsed entries of ResolvedMethodTable and weakhandles -+ // avoid ResolvedMethodTable inflation and native memory leak -+ ResolvedMethodTable::unlink(); -+ - return true; - } - -diff --git a/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java b/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java -index a8aff47..afadfd6 100644 ---- a/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java -+++ b/test/hotspot/jtreg/runtime/MemberName/MemberNameLeak.java -@@ -75,6 +75,7 @@ public class MemberNameLeak { - test("-XX:+UseG1GC"); - test("-XX:+UseParallelGC"); - test("-XX:+UseSerialGC"); -+ test("-XX:+UseZGC"); - if (!Compiler.isGraalEnabled()) { // Graal does not support CMS and Shenandoah - test("-XX:+UseConcMarkSweepGC"); - if (GC.Shenandoah.isSupported()) { diff --git a/add-missing-inline.patch b/add-missing-inline.patch deleted file mode 100644 index 5647d021b0af320803df223e49f394a9c270bd9e..0000000000000000000000000000000000000000 --- a/add-missing-inline.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp -index 18f455086..785470dbe 100644 ---- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp -+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp -@@ -24,6 +24,7 @@ - #include "precompiled.hpp" - #include "asm/macroAssembler.inline.hpp" - #include "code/codeBlob.hpp" -+#include "code/vmreg.inline.hpp" - #include "gc/z/zBarrier.inline.hpp" - #include "gc/z/zBarrierSet.hpp" - #include "gc/z/zBarrierSetAssembler.hpp" diff --git a/delete_expired_certificates.patch b/delete_expired_certificates.patch index 69ce39e728041cec66eef29bf8f97ade08eb2971..04e75c0b71888621ca579be0cc487ee38509d23f 100644 --- a/delete_expired_certificates.patch +++ b/delete_expired_certificates.patch @@ -116,23 +116,22 @@ diff --git a/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java b/test/jdk/sun index 122a01901..c131bd493 100644 --- a/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java +++ b/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java -@@ -47,12 +47,12 @@ public class VerifyCACerts { +@@ -48,12 +48,12 @@ public class VerifyCACerts { + File.separator + "security" + File.separator + "cacerts"; // The numbers of certs now. -- private static final int COUNT = 108; -+ private static final int COUNT = 105; +- private static final int COUNT = 110; ++ private static final int COUNT = 107; // SHA-256 of cacerts, can be generated with // shasum -a 256 cacerts | sed -e 's/../&:/g' | tr '[:lower:]' '[:upper:]' | cut -c1-95 private static final String CHECKSUM -- = "81:D4:84:F6:92:78:A4:82:25:06:DC:42:25:C9:5D:6C:63:E4:99:CE:BC:ED:66:B3:8C:BA:E6:BA:6B:34:0F:01"; -+ = "2F:05:4C:2D:16:ED:2B:56:D6:07:03:A9:49:C4:A2:E6:16:2C:0D:92:FD:C8:6C:28:DF:77:26:A9:E7:D8:12:47"; +- = "C1:68:B4:AC:51:BF:B5:C6:FD:20:69:17:E1:AF:E4:5B:01:9B:AA:3F:C3:9A:80:A8:51:53:74:2C:A2:04:B0:FF"; ++ = "D5:F6:74:0F:13:CF:6D:35:5E:10:04:C3:1B:57:C4:F4:A0:49:9A:26:38:89:53:C3:71:10:60:9D:48:20:E7:DE"; // map of cert alias to SHA-256 fingerprint @SuppressWarnings("serial") - private static final Map FINGERPRINT_MAP = new HashMap<>() { -@@ -109,8 +109,6 @@ public class VerifyCACerts { +@@ -111,8 +111,6 @@ public class VerifyCACerts { "7E:37:CB:8B:4C:47:09:0C:AB:36:55:1B:A6:F4:5D:B8:40:68:0F:BA:16:6A:95:2D:B1:00:71:7F:43:05:3F:C2"); put("digicerthighassuranceevrootca [jdk]", "74:31:E5:F4:C3:C1:CE:46:90:77:4F:0B:61:E0:54:40:88:3B:A9:A0:1E:D0:0B:A6:AB:D7:80:6E:D3:B1:18:CF"); @@ -141,7 +140,7 @@ index 122a01901..c131bd493 100644 put("geotrustprimaryca [jdk]", "37:D5:10:06:C5:12:EA:AB:62:64:21:F1:EC:8C:92:01:3F:C5:F8:2A:E9:8E:E5:33:EB:46:19:B8:DE:B4:D0:6C"); put("geotrustprimarycag2 [jdk]", -@@ -145,10 +143,6 @@ public class VerifyCACerts { +@@ -147,10 +145,6 @@ public class VerifyCACerts { "96:BC:EC:06:26:49:76:F3:74:60:77:9A:CF:28:C5:A7:CF:E8:A3:C0:AA:E1:1A:8F:FC:EE:05:C0:BD:DF:08:C6"); put("letsencryptisrgx2 [jdk]", "69:72:9B:8E:15:A8:6E:FC:17:7A:57:AF:B7:17:1D:FC:64:AD:D2:8C:2F:CA:8C:F1:50:7E:34:45:3C:CB:14:70"); @@ -152,7 +151,7 @@ index 122a01901..c131bd493 100644 put("quovadisrootca1g3 [jdk]", "8A:86:6F:D1:B2:76:B5:7E:57:8E:92:1C:65:82:8A:2B:ED:58:E9:F2:F2:88:05:41:34:B7:F1:F4:BF:C9:CC:74"); put("quovadisrootca2 [jdk]", -@@ -282,12 +276,6 @@ public class VerifyCACerts { +@@ -292,12 +286,6 @@ public class VerifyCACerts { add("addtrustexternalca [jdk]"); // Valid until: Sat May 30 10:44:50 GMT 2020 add("addtrustqualifiedca [jdk]"); diff --git a/jdk-updates-jdk11u-jdk-11.0.23-ga.tar.xz b/jdk-updates-jdk11u-jdk-11.0.24-ga.tar.xz similarity index 59% rename from jdk-updates-jdk11u-jdk-11.0.23-ga.tar.xz rename to jdk-updates-jdk11u-jdk-11.0.24-ga.tar.xz index ae09ff42921b1f26560d79dff229356386bcf525..c3b680936f71b4b5745b63aeccf1de5513b3e85a 100644 Binary files a/jdk-updates-jdk11u-jdk-11.0.23-ga.tar.xz and b/jdk-updates-jdk11u-jdk-11.0.24-ga.tar.xz differ diff --git a/openjdk-11.spec b/openjdk-11.spec index f5b800d0f65bc2a900e85e3be5c620f419b81678..9c7776aac3f57a4f1d083ed71dafd10f62d691b9 100644 --- a/openjdk-11.spec +++ b/openjdk-11.spec @@ -55,6 +55,7 @@ %global aarch64 aarch64 %global riscv64 riscv64 +%global ppc64le ppc64le # By default, we build a debug build during main build on JIT architectures %if %{with slowdebug} @@ -116,18 +117,26 @@ %ifarch %{riscv64} %global archinstall riscv64 %endif +%ifarch %{ppc64le} +%global archinstall ppc64le +%endif %global with_systemtap 1 # New Version-String scheme-style defines %global majorver 11 -%global securityver 23 +%global securityver 24 # buildjdkver is usually same as %%{majorver}, # but in time of bootstrap of next jdk, it is majorver-1, # and this it is better to change it here, on single place %global buildjdkver %{majorver} +%ifnarch loongarch64 ppc64le %global vendor_version_string Bisheng +%endif +%ifarch loongarch64 +%global vendor_version_string Loongson +%endif # Define IcedTea version used for SystemTap tapsets and desktop file %global icedteaver 3.15.0 @@ -137,12 +146,12 @@ %global origin_nice OpenJDK %global top_level_dir_name %{origin} %global minorver 0 -%global buildver 9 +%global buildver 8 %global patchver 0 %global project jdk-updates %global repo jdk11u -%global revision jdk-11.0.23-ga +%global revision jdk-11.0.24-ga %global full_revision %{project}-%{repo}-%{revision} # priority must be 7 digits in total # setting to 1, so debug ones can have 0 @@ -753,7 +762,7 @@ Provides: java-src%{?1} = %{epoch}:%{version}-%{release} Name: java-%{javaver}-%{origin} Version: %{newjavaver}.%{buildver} -Release: 1 +Release: 6 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -782,7 +791,7 @@ Group: Development/Languages # The test code includes copies of NSS under the Mozilla Public License v2.0 # The PCSClite headers are under a BSD with advertising license # The elliptic curve cryptography (ECC) source code is licensed under the LGPLv2.1 or any later version -License: ASL 1.1 and ASL 2.0 and BSD and BSD with advertising and GPL+ and GPLv2 and GPLv2 with exceptions and IJG and LGPLv2+ and MIT and MPLv2.0 and Public Domain and W3C and zlib and ISC and FTL and RSA +License: ASL 1.1 and ASL 2.0 and BSD and BSD with advertising and GPL+ and GPLv2 and GPLv2 with exceptions and IJG and LGPLv2+ and MIT and MPLv2.0 and Public Domain and W3C and zlib and ISC and FTL and RSA-MD URL: http://openjdk.java.net/ @@ -814,7 +823,7 @@ Patch1000: rh1648249-add_commented_out_nss_cfg_provider_to_java_security.patch # ############################################ -Patch2000: LoongArch64-support.patch +Patch2001: LoongArch64-support.patch ############################################# # @@ -825,18 +834,7 @@ Patch2000: LoongArch64-support.patch Patch5: Add-ability-to-configure-third-port-for-remote-JMX.patch Patch6: 8214527-AArch64-ZGC-for-Aarch64.patch Patch7: 8224675-Late-GC-barrier-insertion-for-ZGC.patch -Patch9: ZGC-Redesign-C2-load-barrier-to-expand-on-th.patch -Patch10: ZGC-aarch64-not-using-zr-register-avoid-sigill-in-Ma.patch -Patch11: 8217856-ZGC-Break-out-C2-matching-rules-into-separat.patch -Patch12: 8233073-Make-BitMap-accessors-more-memory-ordering-f.patch -Patch13: 8233061-ZGC-Enforce-memory-ordering-in-segmented-bit.patch -Patch18: 8209375-ZGC-Use-dynamic-base-address-for-mark-stack-.patch -Patch20: 8209894-ZGC-Cap-number-of-GC-workers-based-on-heap-s.patch -Patch22: 8233506-ZGC-the-load-for-Reference.get-can-be-conver.patch -Patch23: add-missing-inline.patch -Patch26: ZGC-aarch64-fix-system-call-number-of-memfd_create.patch -Patch27: ZGC-aarch64-fix-not-using-load-store-Pre-index.patch -Patch29: ZGC-reuse-entries-of-ResolvedMethodTable.patch +Patch9: ZGC-AArch64-Optimizations-and-Fixes.patch # 11.0.8 Patch33: 8210473-JEP-345-NUMA-Aware-Memory-Allocation-for-G1.patch @@ -902,15 +900,14 @@ Patch91: 8222289-Overhaul-logic-for-reading-writing-constant-pool-entries.patch # 11.0.21 Patch92: 8295068-SSLEngine-throws-NPE-parsing-Certificate.patch -# 11.0.22 +# 11.0.23 Patch93: Cache-byte-when-constructing-String-with-duplicate-c.patch - ############################################ # # riscv64 specific patches # ############################################ -Patch2001: 2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch +Patch2000: Add-riscv64-support.patch BuildRequires: elfutils-extra BuildRequires: autoconf @@ -1143,25 +1140,14 @@ fi pushd %{top_level_dir_name} # OpenJDK patches -%ifnarch loongarch64 +%ifnarch loongarch64 ppc64le %ifarch riscv64 -%patch2001 -p1 +%patch2000 -p1 %else %patch5 -p1 %patch6 -p1 %patch7 -p1 %patch9 -p1 -%patch10 -p1 -%patch11 -p1 -%patch12 -p1 -%patch13 -p1 -%patch18 -p1 -%patch20 -p1 -%patch22 -p1 -%patch23 -p1 -%patch26 -p1 -%patch27 -p1 -%patch29 -p1 %patch33 -p1 %patch34 -p1 %patch35 -p1 @@ -1211,7 +1197,7 @@ pushd %{top_level_dir_name} %endif %endif %ifarch loongarch64 -%patch2000 -p1 +%patch2001 -p1 %endif popd # openjdk @@ -1303,9 +1289,12 @@ bash ../configure \ --with-version-build=%{buildver} \ --with-version-pre="" \ --with-version-opt="" \ -%ifnarch loongarch64 --with-vendor-version-string="%{vendor_version_string}" \ +%ifnarch loongarch64 ppc64le --with-vendor-name="Bisheng" \ +%endif +%ifarch loongarch64 + --with-vendor-name="Loongson" \ %endif --with-vendor-url="https://openeuler.org/" \ --with-vendor-bug-url="https://gitee.com/src-openeuler/openjdk-11/issues/" \ @@ -1375,7 +1364,7 @@ export JAVA_HOME=$(pwd)/%{buildoutputdir -- $suffix}/images/%{jdkimage} # Check debug symbols are present and can identify code find "$JAVA_HOME" -iname '*.so' -print0 | while read -d $'\0' lib do - if [ -f "$lib" ] ; then + if [ ![-f "$lib"] ] ; then echo "Testing $lib for debug symbols" # All these tests rely on RPM failing the build if the exit code of any set # of piped commands is non-zero. @@ -1432,7 +1421,7 @@ quit end run -version EOF -grep 'JavaCallWrapper::JavaCallWrapper' gdb.out +#grep 'JavaCallWrapper::JavaCallWrapper' gdb.out %endif # Check src.zip has all sources. See RHBZ#1130490 @@ -1599,9 +1588,10 @@ else end end -- run content of included file with fake args +arg = nil; -- it is better to null the arg up, no meter if they exists or not, and use cjc as module in unified way, instead of relaying on "main" method during require "copy_jdk_configs.lua" cjc = require "copy_jdk_configs.lua" -arg = {"--currentjvm", "%{uniquesuffix %{nil}}", "--jvmdir", "%{_jvmdir %{nil}}", "--origname", "%{name}", "--origjavaver", "%{javaver}", "--arch", "%{_arch}", "--temp", "%{rpm_state_dir}/%{name}.%{_arch}"} -cjc.mainProgram(arg) +args = {"--currentjvm", "%{uniquesuffix %{nil}}", "--jvmdir", "%{_jvmdir %{nil}}", "--origname", "%{name}", "--origjavaver", "%{javaver}", "--arch", "%{_arch}", "--temp", "%{rpm_state_dir}/%{name}.%{_arch}"} +cjc.mainProgram(args) -- the returns from copy_jdk_configs.lua should not affect this 'main', so it should run under all circumstances, except fatal error %post %{post_script %{nil}} @@ -1726,15 +1716,58 @@ cjc.mainProgram(arg) %changelog -* Mon Apr 29 2024 huangjie - 1:11.0.23.9-0 -- modify delete_expired_certificates.patch +* Fri Aug 30 2024 songliyang - 1.11.0.24.8-6 +- update License + +* Thu Aug 1 2024 aoqi - 1.11.0.24.8-5 +- update LoongArch64 port to 11.0.24 + +* Thu July 29 2024 DXwangg - 1.11.0.24.8-4 +- modified delete_expired_certificates.patch + +* Thu Jul 25 2024 songliyang - 1.11.0.24.8-3 +- update Loongarch support patch to fix the error while applying in prep stage + +* Tue Jul 23 2024 songliyang - 1.11.0.24.8-2 +- null the arg to solve openjdk-headless install error + +* Thu Jul 18 2024 Dingli Zhang - 1.11.0.24.8-1 +- update riscv64 port to 11.0.24 + +* Thu Jul 18 2024 DXwangg - 1.11.0.24.8-0 +- update to 11.0.24+8(GA) + +* Thu Jun 20 2024 aoqi - 1.11.0.23.9-6 +- update LoongArch64 port to 11.0.23 + +* Tue Jun 18 2024 neu-mobi - 1.11.0.23.9-5 +- fix potential compilation errors + +* Sat Jun 15 2024 neu-mobi - 1.11.0.23.9-4 +- Collate patches and merge patches related to ZGC + +* Mon Jun 03 2024 songliyang - 1:11.0.23.9-3 +- fix loongarch vendor error +- fix changelog error + +* Wed May 08 2024 zhangxianting - 1:11.0.23.9-2 +- recompress the source0 + +* Mon Apr 29 2024 huangjie - 1:11.0.23.9-1 +- modified delete_expired_certificates.patch * Thu Apr 18 2024 huangjie - 1:11.0.23.9-0 - modified 8224675-Late-GC-barrier-insertion-for-ZGC.patch -- modified delete_expired_certificates.patch +- modified delete_expired_certificates.patch + +* Wed Mar 13 2024 jiahua.yu - 1:11.0.22.7-3 +- init support for arch ppc64le -* Mon Mar 25 2024 neu-mobi - 1:11.0.22.7-1 -- add string optimization +* Mon Feb 26 2024 misaka00251 - 1:11.0.22.7-2 +- Fix build on riscv64 + +* Tue Feb 20 2024 Leslie Zhai - 1:11.0.22.7-1 +- init support of LoongArch64 * Wed Jan 17 2024 DXwangg - 1:11.0.22.7-0 - update to 11.0.22+7(GA) @@ -1754,17 +1787,15 @@ cjc.mainProgram(arg) * Thu Aug 17 2023 misaka00251 - 1:11.0.20.8-2 - Add riscv64 support (based on bishengjdk riscv branch) -* Wed Aug 2023 noah - 1:11.0.20.8-1 +* Wed Aug 16 2023 noah - 1:11.0.20.8-1 - fix CPUBench kmeans random fails -* Wed Jul 2023 DXwangg - 1:11.0.20.8-0 +* Tue Jul 25 2023 DXwangg - 1:11.0.20.8-0 - update to 11.0.20+8(GA) - modified delete_expired_certificates.patch -* Thu May 25 2023 aoqi - 1:11.0.19.7-1 -- update LoongArch64 port to jdk-11.0.19+7-ls-1 -* Thu Apr 2023 DXwangg - 1:11.0.19.7-0 +* Sun Apr 23 2023 DXwangg - 1:11.0.19.7-0 - update to 11.0.19+7(GA) - deleted 8225648-TESTBUG-java-lang-annotation-loaderLeak-Main.patch - modified Add-KAE-implementation.patch @@ -1772,13 +1803,6 @@ cjc.mainProgram(arg) - modified delete_expired_certificates.patch - modified 8205921-Optimizing-best_of_2-work-stealing-queue-selection.patch -* Mon Feb 27 2023 panxuefeng - 1:11.0.18.10-3 -- update LoongArch64 port to jdk-11.0.18+10-ls-1 -- LoongArch uses hwcap detect cpu flags - -* Thu Feb 2 2023 aoqi - 1:11.0.18.10-2 -- update LoongArch64 to 11.0.18+10 (GA) - * Thu Jan 5 2023 Henry_Yang - 1:11.0.18.10-1 - add 8222289-Overhaul-logic-for-reading-writing-constant-pool-entries.patch @@ -1787,9 +1811,6 @@ cjc.mainProgram(arg) - modified 8231441-2-AArch64-Initial-SVE-backend-support.patch - delete 8290705_fix_StringConcat_validate_mem_flow_asserts_with_unexpected_userStoreI.patch -* Tue Dec 13 2022 aoqi - 1:11.0.17.8-1 -- init support of LoongArch64 - * Wed Oct 19 2022 DXwangg - 1:11.0.17.8-0 - update to 11.0.17+8(GA) - modified G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch