diff --git a/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch b/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch new file mode 100644 index 0000000000000000000000000000000000000000..13815b71215d789e75976982277c6f8b05762627 --- /dev/null +++ b/2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch @@ -0,0 +1,61086 @@ +From 77eaf1804b7e56ed17a6c3a478e6ee9df89ea024 Mon Sep 17 00:00:00 2001 +From: misaka00251 +Date: Wed, 9 Aug 2023 02:24:23 +0800 +Subject: [PATCH] Add riscv64 support (based on bishengjdk riscv branch) + +--- + make/autoconf/build-aux/config.sub | 7 + + make/autoconf/hotspot.m4 | 3 +- + make/autoconf/libraries.m4 | 4 +- + make/autoconf/platform.m4 | 10 +- + make/hotspot/gensrc/GensrcAdlc.gmk | 16 +- + src/hotspot/cpu/aarch64/aarch64.ad | 40 +- + .../cpu/aarch64/c1_LIRAssembler_aarch64.cpp | 4 +- + .../cpu/aarch64/macroAssembler_aarch64.cpp | 64 + + .../cpu/aarch64/macroAssembler_aarch64.hpp | 3 + + src/hotspot/cpu/arm/arm.ad | 10 +- + src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp | 5 +- + src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp | 5 +- + src/hotspot/cpu/ppc/ppc.ad | 16 +- + .../cpu/riscv/abstractInterpreter_riscv.cpp | 185 + + src/hotspot/cpu/riscv/assembler_riscv.cpp | 365 + + src/hotspot/cpu/riscv/assembler_riscv.hpp | 2004 +++ + .../cpu/riscv/assembler_riscv.inline.hpp | 47 + + src/hotspot/cpu/riscv/bytes_riscv.hpp | 169 + + src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp | 352 + + src/hotspot/cpu/riscv/c1_Defs_riscv.hpp | 85 + + .../cpu/riscv/c1_FpuStackSim_riscv.cpp | 31 + + .../cpu/riscv/c1_FpuStackSim_riscv.hpp | 33 + + src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp | 391 + + src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp | 149 + + .../cpu/riscv/c1_LIRAssembler_arith_riscv.cpp | 287 + + .../cpu/riscv/c1_LIRAssembler_arith_riscv.hpp | 36 + + .../riscv/c1_LIRAssembler_arraycopy_riscv.cpp | 387 + + .../riscv/c1_LIRAssembler_arraycopy_riscv.hpp | 51 + + .../cpu/riscv/c1_LIRAssembler_riscv.cpp | 2275 ++++ + .../cpu/riscv/c1_LIRAssembler_riscv.hpp | 132 + + .../cpu/riscv/c1_LIRGenerator_riscv.cpp | 1083 ++ + src/hotspot/cpu/riscv/c1_LIR_riscv.cpp | 55 + + src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp | 33 + + src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp | 85 + + .../cpu/riscv/c1_MacroAssembler_riscv.cpp | 441 + + .../cpu/riscv/c1_MacroAssembler_riscv.hpp | 121 + + src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp | 1206 ++ + src/hotspot/cpu/riscv/c1_globals_riscv.hpp | 72 + + src/hotspot/cpu/riscv/c2_globals_riscv.hpp | 91 + + src/hotspot/cpu/riscv/c2_init_riscv.cpp | 38 + + src/hotspot/cpu/riscv/codeBuffer_riscv.hpp | 36 + + src/hotspot/cpu/riscv/compiledIC_riscv.cpp | 154 + + src/hotspot/cpu/riscv/copy_riscv.hpp | 60 + + src/hotspot/cpu/riscv/depChecker_riscv.hpp | 32 + + src/hotspot/cpu/riscv/disassembler_riscv.hpp | 37 + + src/hotspot/cpu/riscv/frame_riscv.cpp | 683 + + src/hotspot/cpu/riscv/frame_riscv.hpp | 200 + + src/hotspot/cpu/riscv/frame_riscv.inline.hpp | 257 + + .../gc/g1/g1BarrierSetAssembler_riscv.cpp | 479 + + .../gc/g1/g1BarrierSetAssembler_riscv.hpp | 78 + + .../gc/shared/barrierSetAssembler_riscv.cpp | 226 + + .../gc/shared/barrierSetAssembler_riscv.hpp | 75 + + .../cardTableBarrierSetAssembler_riscv.cpp | 120 + + .../cardTableBarrierSetAssembler_riscv.hpp | 43 + + .../modRefBarrierSetAssembler_riscv.cpp | 54 + + .../modRefBarrierSetAssembler_riscv.hpp | 55 + + .../c1/shenandoahBarrierSetC1_riscv.cpp | 124 + + .../shenandoahBarrierSetAssembler_riscv.cpp | 743 ++ + .../shenandoahBarrierSetAssembler_riscv.hpp | 92 + + .../riscv/gc/shenandoah/shenandoah_riscv64.ad | 188 + + .../cpu/riscv/globalDefinitions_riscv.hpp | 44 + + src/hotspot/cpu/riscv/globals_riscv.hpp | 120 + + src/hotspot/cpu/riscv/icBuffer_riscv.cpp | 79 + + src/hotspot/cpu/riscv/icache_riscv.cpp | 61 + + src/hotspot/cpu/riscv/icache_riscv.hpp | 42 + + src/hotspot/cpu/riscv/interp_masm_riscv.cpp | 1932 +++ + src/hotspot/cpu/riscv/interp_masm_riscv.hpp | 283 + + src/hotspot/cpu/riscv/interpreterRT_riscv.cpp | 296 + + src/hotspot/cpu/riscv/interpreterRT_riscv.hpp | 68 + + .../cpu/riscv/javaFrameAnchor_riscv.hpp | 89 + + .../cpu/riscv/jniFastGetField_riscv.cpp | 193 + + src/hotspot/cpu/riscv/jniTypes_riscv.hpp | 108 + + .../cpu/riscv/macroAssembler_riscv.cpp | 5861 +++++++++ + .../cpu/riscv/macroAssembler_riscv.hpp | 975 ++ + .../cpu/riscv/macroAssembler_riscv.inline.hpp | 30 + + src/hotspot/cpu/riscv/methodHandles_riscv.cpp | 440 + + src/hotspot/cpu/riscv/methodHandles_riscv.hpp | 58 + + src/hotspot/cpu/riscv/nativeInst_riscv.cpp | 404 + + src/hotspot/cpu/riscv/nativeInst_riscv.hpp | 561 + + src/hotspot/cpu/riscv/registerMap_riscv.hpp | 46 + + .../cpu/riscv/register_definitions_riscv.cpp | 193 + + src/hotspot/cpu/riscv/register_riscv.cpp | 69 + + src/hotspot/cpu/riscv/register_riscv.hpp | 337 + + src/hotspot/cpu/riscv/relocInfo_riscv.cpp | 113 + + src/hotspot/cpu/riscv/relocInfo_riscv.hpp | 45 + + src/hotspot/cpu/riscv/riscv.ad | 10685 ++++++++++++++++ + src/hotspot/cpu/riscv/riscv_b.ad | 605 + + src/hotspot/cpu/riscv/riscv_v.ad | 1723 +++ + src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp | 2738 ++++ + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 3743 ++++++ + src/hotspot/cpu/riscv/stubRoutines_riscv.cpp | 60 + + src/hotspot/cpu/riscv/stubRoutines_riscv.hpp | 179 + + .../templateInterpreterGenerator_riscv.cpp | 1841 +++ + src/hotspot/cpu/riscv/templateTable_riscv.cpp | 4028 ++++++ + src/hotspot/cpu/riscv/templateTable_riscv.hpp | 42 + + src/hotspot/cpu/riscv/vmStructs_riscv.hpp | 43 + + .../cpu/riscv/vm_version_ext_riscv.cpp | 91 + + .../cpu/riscv/vm_version_ext_riscv.hpp | 55 + + src/hotspot/cpu/riscv/vm_version_riscv.cpp | 190 + + src/hotspot/cpu/riscv/vm_version_riscv.hpp | 65 + + src/hotspot/cpu/riscv/vmreg_riscv.cpp | 60 + + src/hotspot/cpu/riscv/vmreg_riscv.hpp | 64 + + src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp | 47 + + src/hotspot/cpu/riscv/vtableStubs_riscv.cpp | 260 + + src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp | 5 +- + src/hotspot/cpu/s390/s390.ad | 16 +- + src/hotspot/cpu/sparc/sparc.ad | 10 +- + src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp | 5 +- + src/hotspot/cpu/x86/macroAssembler_x86.cpp | 93 + + src/hotspot/cpu/x86/macroAssembler_x86.hpp | 2 + + src/hotspot/cpu/x86/x86.ad | 14 +- + src/hotspot/cpu/x86/x86_32.ad | 19 +- + src/hotspot/cpu/x86/x86_64.ad | 24 +- + src/hotspot/os/linux/os_linux.cpp | 11 +- + .../os_cpu/linux_riscv/atomic_linux_riscv.hpp | 113 + + .../linux_riscv/bytes_linux_riscv.inline.hpp | 44 + + .../linux_riscv/copy_linux_riscv.inline.hpp | 116 + + .../linux_riscv/globals_linux_riscv.hpp | 43 + + .../linux_riscv/orderAccess_linux_riscv.hpp | 73 + + .../os_cpu/linux_riscv/os_linux_riscv.cpp | 628 + + .../os_cpu/linux_riscv/os_linux_riscv.hpp | 40 + + .../prefetch_linux_riscv.inline.hpp | 38 + + .../os_cpu/linux_riscv/thread_linux_riscv.cpp | 103 + + .../os_cpu/linux_riscv/thread_linux_riscv.hpp | 67 + + .../linux_riscv/vmStructs_linux_riscv.hpp | 55 + + .../linux_riscv/vm_version_linux_riscv.cpp | 116 + + src/hotspot/share/adlc/archDesc.cpp | 5 + + src/hotspot/share/adlc/formssel.cpp | 2 + + src/hotspot/share/c1/c1_LIR.cpp | 113 +- + src/hotspot/share/c1/c1_LIR.hpp | 208 +- + src/hotspot/share/c1/c1_LIRAssembler.cpp | 15 +- + src/hotspot/share/c1/c1_LIRAssembler.hpp | 4 +- + src/hotspot/share/c1/c1_LinearScan.cpp | 14 +- + src/hotspot/share/classfile/vmSymbols.cpp | 2 + + src/hotspot/share/classfile/vmSymbols.hpp | 1 + + .../gc/shenandoah/shenandoahArguments.cpp | 2 +- + .../share/jfr/utilities/jfrBigEndian.hpp | 2 +- + src/hotspot/share/opto/c2compiler.cpp | 1 + + src/hotspot/share/opto/chaitin.cpp | 90 +- + src/hotspot/share/opto/chaitin.hpp | 32 +- + src/hotspot/share/opto/intrinsicnode.hpp | 5 +- + src/hotspot/share/opto/library_call.cpp | 13 +- + src/hotspot/share/opto/machnode.cpp | 2 +- + src/hotspot/share/opto/machnode.hpp | 4 + + src/hotspot/share/opto/matcher.cpp | 41 +- + src/hotspot/share/opto/matcher.hpp | 6 +- + src/hotspot/share/opto/node.cpp | 21 + + src/hotspot/share/opto/node.hpp | 5 + + src/hotspot/share/opto/opcodes.cpp | 4 +- + src/hotspot/share/opto/opcodes.hpp | 2 + + src/hotspot/share/opto/phase.cpp | 2 + + src/hotspot/share/opto/phase.hpp | 1 + + src/hotspot/share/opto/postaloc.cpp | 53 +- + src/hotspot/share/opto/regmask.cpp | 46 +- + src/hotspot/share/opto/regmask.hpp | 10 +- + src/hotspot/share/opto/superword.cpp | 7 +- + src/hotspot/share/opto/type.cpp | 14 +- + src/hotspot/share/opto/type.hpp | 12 +- + src/hotspot/share/opto/vectornode.cpp | 4 +- + .../share/runtime/abstract_vm_version.cpp | 12 +- + src/hotspot/share/runtime/thread.hpp | 2 +- + src/hotspot/share/runtime/thread.inline.hpp | 2 +- + src/hotspot/share/utilities/debug.cpp | 1 + + src/hotspot/share/utilities/macros.hpp | 26 + + .../share/classes/java/lang/StringLatin1.java | 5 + + .../native/libsaproc/LinuxDebuggerLocal.c | 49 +- + .../linux/native/libsaproc/libproc.h | 2 + + .../linux/native/libsaproc/ps_proc.c | 4 + + .../classes/sun/jvm/hotspot/HotSpotAgent.java | 4 + + .../debugger/MachineDescriptionRISCV64.java | 40 + + .../debugger/linux/LinuxCDebugger.java | 11 +- + .../linux/riscv64/LinuxRISCV64CFrame.java | 90 + + .../riscv64/LinuxRISCV64ThreadContext.java | 48 + + .../debugger/proc/ProcDebuggerLocal.java | 6 + + .../proc/riscv64/ProcRISCV64Thread.java | 88 + + .../riscv64/ProcRISCV64ThreadContext.java | 48 + + .../riscv64/ProcRISCV64ThreadFactory.java | 46 + + .../remote/riscv64/RemoteRISCV64Thread.java | 55 + + .../riscv64/RemoteRISCV64ThreadContext.java | 48 + + .../riscv64/RemoteRISCV64ThreadFactory.java | 46 + + .../riscv64/RISCV64ThreadContext.java | 172 + + .../sun/jvm/hotspot/runtime/Threads.java | 3 + + .../LinuxRISCV64JavaThreadPDAccess.java | 132 + + .../riscv64/RISCV64CurrentFrameGuess.java | 223 + + .../hotspot/runtime/riscv64/RISCV64Frame.java | 554 + + .../riscv64/RISCV64JavaCallWrapper.java | 58 + + .../runtime/riscv64/RISCV64RegisterMap.java | 53 + + .../jvm/hotspot/utilities/PlatformInfo.java | 2 +- + src/utils/hsdis/hsdis.c | 6 +- + test/hotspot/jtreg/compiler/c2/TestBit.java | 6 +- + ...eSHA1IntrinsicsOptionOnUnsupportedCPU.java | 4 + + ...HA256IntrinsicsOptionOnUnsupportedCPU.java | 4 + + ...HA512IntrinsicsOptionOnUnsupportedCPU.java | 4 + + .../cli/TestUseSHAOptionOnUnsupportedCPU.java | 4 + + .../testcases/GenericTestCaseForOtherCPU.java | 10 +- + ...nericTestCaseForUnsupportedRISCV64CPU.java | 102 + + .../string/TestStringLatin1IndexOfChar.java | 153 + + .../loopopts/superword/ProdRed_Double.java | 2 +- + .../loopopts/superword/ProdRed_Float.java | 2 +- + .../loopopts/superword/ProdRed_Int.java | 2 +- + .../loopopts/superword/ReductionPerf.java | 2 +- + .../superword/SumRedAbsNeg_Double.java | 2 +- + .../superword/SumRedAbsNeg_Float.java | 2 +- + .../loopopts/superword/SumRedSqrt_Double.java | 2 +- + .../loopopts/superword/SumRed_Double.java | 2 +- + .../loopopts/superword/SumRed_Float.java | 2 +- + .../loopopts/superword/SumRed_Int.java | 2 +- + .../argumentcorruption/CheckLongArgs.java | 2 +- + .../criticalnatives/lookup/LookUp.java | 2 +- + .../sha/predicate/IntrinsicPredicates.java | 9 +- + .../NMT/CheckForProperDetailStackTrace.java | 3 +- + .../ReservedStack/ReservedStackTest.java | 3 +- + test/hotspot/jtreg/test_env.sh | 5 + + ...stMutuallyExclusivePlatformPredicates.java | 3 +- + .../nsk/jvmti/GetThreadInfo/thrinfo001.java | 2 +- + .../jdk/jfr/event/os/TestCPUInformation.java | 5 +- + test/lib/jdk/test/lib/Platform.java | 5 + + .../bench/java/lang/StringIndexOfChar.java | 221 + + 218 files changed, 57653 insertions(+), 221 deletions(-) + create mode 100644 src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/assembler_riscv.inline.hpp + create mode 100644 src/hotspot/cpu/riscv/bytes_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c1_Defs_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c1_LIR_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/c1_globals_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c2_globals_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/c2_init_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/codeBuffer_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/compiledIC_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/copy_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/depChecker_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/disassembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/frame_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/frame_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/frame_riscv.inline.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad + create mode 100644 src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/globals_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/icBuffer_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/icache_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/icache_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/interp_masm_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/interpreterRT_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/jniTypes_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp + create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/methodHandles_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/nativeInst_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/registerMap_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/register_definitions_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/register_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/register_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/relocInfo_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/riscv.ad + create mode 100644 src/hotspot/cpu/riscv/riscv_b.ad + create mode 100644 src/hotspot/cpu/riscv/riscv_v.ad + create mode 100644 src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/stubGenerator_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/stubRoutines_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/templateTable_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/vmStructs_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/vm_version_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.cpp + create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.hpp + create mode 100644 src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp + create mode 100644 src/hotspot/cpu/riscv/vtableStubs_riscv.cpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp + create mode 100644 src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java + create mode 100644 src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java + create mode 100644 test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java + create mode 100644 test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java + create mode 100644 test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java + +diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub +index 3c280ac7c..eda408e01 100644 +--- a/make/autoconf/build-aux/config.sub ++++ b/make/autoconf/build-aux/config.sub +@@ -48,6 +48,13 @@ if ! echo $* | grep '^aarch64-' >/dev/null ; then + exit + fi + ++# Canonicalize for riscv which autoconf-config.sub doesn't handle ++if echo $* | grep '^riscv\(32\|64\)-linux' > /dev/null ; then ++ result=`echo $@ | sed 's/linux/unknown-linux/'` ++ echo $result ++ exit ++fi ++ + while test $# -gt 0 ; do + case $1 in + -- ) # Stop option processing +diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4 +index a3e1e00b2..01ef26c10 100644 +--- a/make/autoconf/hotspot.m4 ++++ b/make/autoconf/hotspot.m4 +@@ -367,7 +367,8 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], + AC_MSG_CHECKING([if shenandoah can be built]) + if HOTSPOT_CHECK_JVM_FEATURE(shenandoahgc); then + if test "x$OPENJDK_TARGET_CPU_ARCH" = "xx86" || \ +- test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then ++ test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \ ++ test "x$OPENJDK_TARGET_CPU" = "xriscv64" ; then + AC_MSG_RESULT([yes]) + else + DISABLED_JVM_FEATURES="$DISABLED_JVM_FEATURES shenandoahgc" +diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4 +index 16e906bdc..c01fdbcce 100644 +--- a/make/autoconf/libraries.m4 ++++ b/make/autoconf/libraries.m4 +@@ -110,7 +110,7 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], + GLOBAL_LIBS="" + fi + +- BASIC_JDKLIB_LIBS="" ++ BASIC_JDKLIB_LIBS="-latomic" + if test "x$TOOLCHAIN_TYPE" != xmicrosoft; then + BASIC_JDKLIB_LIBS="-ljava -ljvm" + fi +@@ -147,6 +147,8 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES], + wsock32.lib winmm.lib version.lib psapi.lib" + fi + ++ BASIC_JVM_LIBS="$BASIC_JVM_LIBS -latomic" ++ + JDKLIB_LIBS="$BASIC_JDKLIB_LIBS" + JDKEXE_LIBS="" + JVM_LIBS="$BASIC_JVM_LIBS" +diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 +index f89b22f5f..48d615992 100644 +--- a/make/autoconf/platform.m4 ++++ b/make/autoconf/platform.m4 +@@ -120,6 +120,12 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU], + VAR_CPU_BITS=64 + VAR_CPU_ENDIAN=little + ;; ++ riscv32) ++ VAR_CPU=riscv32 ++ VAR_CPU_ARCH=riscv ++ VAR_CPU_BITS=32 ++ VAR_CPU_ENDIAN=little ++ ;; + riscv64) + VAR_CPU=riscv64 + VAR_CPU_ARCH=riscv +@@ -564,8 +570,10 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], + HOTSPOT_$1_CPU_DEFINE=S390 + elif test "x$OPENJDK_$1_CPU" = xs390x; then + HOTSPOT_$1_CPU_DEFINE=S390 ++ elif test "x$OPENJDK_$1_CPU" = xriscv32; then ++ HOTSPOT_$1_CPU_DEFINE=RISCV32 + elif test "x$OPENJDK_$1_CPU" = xriscv64; then +- HOTSPOT_$1_CPU_DEFINE=RISCV ++ HOTSPOT_$1_CPU_DEFINE=RISCV64 + elif test "x$OPENJDK_$1_CPU" != x; then + HOTSPOT_$1_CPU_DEFINE=$(echo $OPENJDK_$1_CPU | tr a-z A-Z) + fi +diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk +index c5a3ac572..9de6f663c 100644 +--- a/make/hotspot/gensrc/GensrcAdlc.gmk ++++ b/make/hotspot/gensrc/GensrcAdlc.gmk +@@ -1,5 +1,5 @@ + # +-# Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved. + # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + # + # This code is free software; you can redistribute it and/or modify it +@@ -150,6 +150,20 @@ ifeq ($(call check-jvm-feature, compiler2), true) + $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \ + ))) + ++ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64) ++ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ ++ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \ ++ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \ ++ ))) ++ endif ++ ++ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv) ++ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ ++ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_v.ad \ ++ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_b.ad \ ++ ))) ++ endif ++ + ifeq ($(call check-jvm-feature, shenandoahgc), true) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ +diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad +index 1e4ee33a9..ac5d56f0f 100644 +--- a/src/hotspot/cpu/aarch64/aarch64.ad ++++ b/src/hotspot/cpu/aarch64/aarch64.ad +@@ -2062,15 +2062,17 @@ const bool Matcher::match_rule_supported(int opcode) { + return true; // Per default match rules are supported. + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen +- bool ret_value = match_rule_supported(opcode); ++ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { ++ return false; ++ } + // Add rules here. + +- return ret_value; // Per default match rules are supported. ++ return true; // Per default match rules are supported. + } + + const bool Matcher::has_predicated_vectors(void) { +@@ -2129,6 +2131,14 @@ const int Matcher::min_vector_size(const BasicType bt) { + return size; + } + ++const bool Matcher::supports_scalable_vector() { ++ return false; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return -1; ++} ++ + // Vector ideal reg. + const uint Matcher::vector_ideal_reg(int len) { + switch(len) { +@@ -15515,15 +15525,16 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, + ins_pipe(pipe_class_memory); + %} + +-instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, ++instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, + iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, rFlagsReg cr) + %{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ predicate(((StrIndexOfCharNode*)n) ->encoding() == StrIntrinsicNode::U); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, + TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + +- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %} ++ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} + + ins_encode %{ + __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, +@@ -15533,6 +15544,25 @@ instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, + ins_pipe(pipe_class_memory); + %} + ++instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, ++ iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, rFlagsReg cr) ++%{ ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); ++ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); ++ ++ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result" %} ++ ++ ins_encode %{ ++ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ + instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt, + iRegI_R0 result, rFlagsReg cr) + %{ +diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +index fdd2c0ca3..1a35be210 100644 +--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +@@ -1593,7 +1593,9 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + } + + +-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { ++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, ++ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on aarch64"); + + Assembler::Condition acond, ncond; + switch (condition) { +diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +index 5753cc9a6..21c6fdf19 100644 +--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +@@ -4829,6 +4829,70 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, + BIND(DONE); + } + ++void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, Register tmp3) ++{ ++ Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE; ++ Register cnt1_neg = cnt1; ++ Register ch1 = rscratch1; ++ Register result_tmp = rscratch2; ++ ++ cbz(cnt1, NOMATCH); ++ ++ cmp(cnt1, (u1)8); ++ br(LT, DO1_SHORT); ++ ++ orr(ch, ch, ch, LSL, 8); ++ orr(ch, ch, ch, LSL, 16); ++ orr(ch, ch, ch, LSL, 32); ++ ++ sub(cnt1, cnt1, 8); ++ mov(result_tmp, cnt1); ++ lea(str1, Address(str1, cnt1)); ++ sub(cnt1_neg, zr, cnt1); ++ ++ mov(tmp3, 0x0101010101010101); ++ ++ BIND(CH1_LOOP); ++ ldr(ch1, Address(str1, cnt1_neg)); ++ eor(ch1, ch, ch1); ++ sub(tmp1, ch1, tmp3); ++ orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f); ++ bics(tmp1, tmp1, tmp2); ++ br(NE, HAS_ZERO); ++ adds(cnt1_neg, cnt1_neg, 8); ++ br(LT, CH1_LOOP); ++ ++ cmp(cnt1_neg, (u1)8); ++ mov(cnt1_neg, 0); ++ br(LT, CH1_LOOP); ++ b(NOMATCH); ++ ++ BIND(HAS_ZERO); ++ rev(tmp1, tmp1); ++ clz(tmp1, tmp1); ++ add(cnt1_neg, cnt1_neg, tmp1, LSR, 3); ++ b(MATCH); ++ ++ BIND(DO1_SHORT); ++ mov(result_tmp, cnt1); ++ lea(str1, Address(str1, cnt1)); ++ sub(cnt1_neg, zr, cnt1); ++ BIND(DO1_LOOP); ++ ldrb(ch1, Address(str1, cnt1_neg)); ++ cmp(ch, ch1); ++ br(EQ, MATCH); ++ adds(cnt1_neg, cnt1_neg, 1); ++ br(LT, DO1_LOOP); ++ BIND(NOMATCH); ++ mov(result, -1); ++ b(DONE); ++ BIND(MATCH); ++ add(result, result_tmp, cnt1_neg); ++ BIND(DONE); ++} ++ + // Compare strings. + void MacroAssembler::string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, +diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +index 7e23c16a4..c3d472a9a 100644 +--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp ++++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +@@ -1260,6 +1260,9 @@ public: + void string_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, Register tmp3); ++ void stringL_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, Register tmp3); + void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2, + FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5, + FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3, +diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad +index 51f2d9ce7..71f83521e 100644 +--- a/src/hotspot/cpu/arm/arm.ad ++++ b/src/hotspot/cpu/arm/arm.ad +@@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) { + return true; // Per default match rules are supported. + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + + // TODO + // identify extra cases that we might want to provide match rules for +@@ -1121,6 +1121,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) { + return MaxVectorSize; + } + ++const bool Matcher::supports_scalable_vector() { ++ return false; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return -1; ++} ++ + // Vector ideal reg corresponding to specified size in bytes + const uint Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize >= size, ""); +diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +index f0a7229aa..2d06d3d58 100644 +--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp ++++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +@@ -1824,7 +1824,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + } + + +-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { ++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, ++ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on arm"); ++ + AsmCondition acond = al; + AsmCondition ncond = nv; + if (opr1 != opr2) { +diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +index 847f7d61d..d081116be 100644 +--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp ++++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +@@ -1554,7 +1554,10 @@ inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) { + } + + +-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { ++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, ++ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on ppc"); ++ + if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) { + load_to_reg(this, opr1, result); // Condition doesn't matter. + return; +diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad +index ebbe80a26..df66a46dc 100644 +--- a/src/hotspot/cpu/ppc/ppc.ad ++++ b/src/hotspot/cpu/ppc/ppc.ad +@@ -2242,15 +2242,17 @@ const bool Matcher::match_rule_supported(int opcode) { + return true; // Per default match rules are supported. + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen +- bool ret_value = match_rule_supported(opcode); ++ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { ++ return false; ++ } + // Add rules here. + +- return ret_value; // Per default match rules are supported. ++ return true; // Per default match rules are supported. + } + + const bool Matcher::has_predicated_vectors(void) { +@@ -2310,6 +2312,14 @@ const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); // Same as max. + } + ++const bool Matcher::supports_scalable_vector() { ++ return false; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return -1; ++} ++ + // PPC implementation uses VSX load/store instructions (if + // SuperwordUseVSX) which support 4 byte but not arbitrary alignment + const bool Matcher::misaligned_vectors_ok() { +diff --git a/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp +new file mode 100644 +index 000000000..5661b7425 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/abstractInterpreter_riscv.cpp +@@ -0,0 +1,185 @@ ++/* ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/constMethod.hpp" ++#include "oops/method.hpp" ++#include "runtime/frame.inline.hpp" ++#include "utilities/align.hpp" ++#include "utilities/debug.hpp" ++#include "utilities/macros.hpp" ++ ++ ++int AbstractInterpreter::BasicType_as_index(BasicType type) { ++ int i = 0; ++ switch (type) { ++ case T_BOOLEAN: i = 0; break; ++ case T_CHAR : i = 1; break; ++ case T_BYTE : i = 2; break; ++ case T_SHORT : i = 3; break; ++ case T_INT : i = 4; break; ++ case T_LONG : i = 5; break; ++ case T_VOID : i = 6; break; ++ case T_FLOAT : i = 7; break; ++ case T_DOUBLE : i = 8; break; ++ case T_OBJECT : i = 9; break; ++ case T_ARRAY : i = 9; break; ++ default : ShouldNotReachHere(); ++ } ++ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, ++ "index out of bounds"); ++ return i; ++} ++ ++// How much stack a method activation needs in words. ++int AbstractInterpreter::size_top_interpreter_activation(Method* method) { ++ const int entry_size = frame::interpreter_frame_monitor_size(); ++ ++ // total overhead size: entry_size + (saved fp thru expr stack ++ // bottom). be sure to change this if you add/subtract anything ++ // to/from the overhead area ++ const int overhead_size = ++ -(frame::interpreter_frame_initial_sp_offset) + entry_size; ++ ++ const int stub_code = frame::entry_frame_after_call_words; ++ assert_cond(method != NULL); ++ const int method_stack = (method->max_locals() + method->max_stack()) * ++ Interpreter::stackElementWords; ++ return (overhead_size + method_stack + stub_code); ++} ++ ++// asm based interpreter deoptimization helpers ++int AbstractInterpreter::size_activation(int max_stack, ++ int temps, ++ int extra_args, ++ int monitors, ++ int callee_params, ++ int callee_locals, ++ bool is_top_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in TemplateInterpreterGenerator::generate_method_entry. ++ ++ // fixed size of an interpreter frame: ++ int overhead = frame::sender_sp_offset - ++ frame::interpreter_frame_initial_sp_offset; ++ // Our locals were accounted for by the caller (or last_frame_adjust ++ // on the transistion) Since the callee parameters already account ++ // for the callee's params we only need to account for the extra ++ // locals. ++ int size = overhead + ++ (callee_locals - callee_params) + ++ monitors * frame::interpreter_frame_monitor_size() + ++ // On the top frame, at all times SP <= ESP, and SP is ++ // 16-aligned. We ensure this by adjusting SP on method ++ // entry and re-entry to allow room for the maximum size of ++ // the expression stack. When we call another method we bump ++ // SP so that no stack space is wasted. So, only on the top ++ // frame do we need to allow max_stack words. ++ (is_top_frame ? max_stack : temps + extra_args); ++ ++ // On riscv we always keep the stack pointer 16-aligned, so we ++ // must round up here. ++ size = align_up(size, 2); ++ ++ return size; ++} ++ ++void AbstractInterpreter::layout_activation(Method* method, ++ int tempcount, ++ int popframe_extra_args, ++ int moncount, ++ int caller_actual_parameters, ++ int callee_param_count, ++ int callee_locals, ++ frame* caller, ++ frame* interpreter_frame, ++ bool is_top_frame, ++ bool is_bottom_frame) { ++ // The frame interpreter_frame is guaranteed to be the right size, ++ // as determined by a previous call to the size_activation() method. ++ // It is also guaranteed to be walkable even though it is in a ++ // skeletal state ++ ++ assert_cond(method != NULL && caller != NULL && interpreter_frame != NULL); ++ int max_locals = method->max_locals() * Interpreter::stackElementWords; ++ int extra_locals = (method->max_locals() - method->size_of_parameters()) * ++ Interpreter::stackElementWords; ++ ++#ifdef ASSERT ++ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable"); ++#endif ++ ++ interpreter_frame->interpreter_frame_set_method(method); ++ // NOTE the difference in using sender_sp and interpreter_frame_sender_sp ++ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) ++ // and sender_sp is fp ++ // ++ // The interpreted method entry on riscv aligns SP to 16 bytes ++ // before generating the fixed part of the activation frame. So there ++ // may be a gap between the locals block and the saved sender SP. For ++ // an interpreted caller we need to recreate this gap and exactly ++ // align the incoming parameters with the caller's temporary ++ // expression stack. For other types of caller frame it doesn't ++ // matter. ++ intptr_t* locals = NULL; ++ if (caller->is_interpreted_frame()) { ++ locals = caller->interpreter_frame_last_sp() + caller_actual_parameters - 1; ++ } else { ++ locals = interpreter_frame->sender_sp() + max_locals - 1; ++ } ++ ++#ifdef ASSERT ++ if (caller->is_interpreted_frame()) { ++ assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); ++ } ++#endif ++ ++ interpreter_frame->interpreter_frame_set_locals(locals); ++ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); ++ BasicObjectLock* monbot = montop - moncount; ++ interpreter_frame->interpreter_frame_set_monitor_end(monbot); ++ ++ // Set last_sp ++ intptr_t* last_sp = (intptr_t*) monbot - ++ tempcount*Interpreter::stackElementWords - ++ popframe_extra_args; ++ interpreter_frame->interpreter_frame_set_last_sp(last_sp); ++ ++ // All frames but the initial (oldest) interpreter frame we fill in have ++ // a value for sender_sp that allows walking the stack but isn't ++ // truly correct. Correct the value here. ++ if (extra_locals != 0 && ++ interpreter_frame->sender_sp() == ++ interpreter_frame->interpreter_frame_sender_sp()) { ++ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + ++ extra_locals); ++ } ++ *interpreter_frame->interpreter_frame_cache_addr() = ++ method->constants()->cache(); ++ *interpreter_frame->interpreter_frame_mirror_addr() = ++ method->method_holder()->java_mirror(); ++} +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.cpp b/src/hotspot/cpu/riscv/assembler_riscv.cpp +new file mode 100644 +index 000000000..40ecf1a6c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/assembler_riscv.cpp +@@ -0,0 +1,365 @@ ++/* ++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include ++#include ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++ ++#define __ _masm. ++ ++int AbstractAssembler::code_fill_byte() { ++ return 0; ++} ++ ++void Assembler::add(Register Rd, Register Rn, int64_t increment, Register temp) { ++ if (is_imm_in_range(increment, 12, 0)) { ++ addi(Rd, Rn, increment); ++ } else { ++ assert_different_registers(Rn, temp); ++ li(temp, increment); ++ add(Rd, Rn, temp); ++ } ++} ++ ++void Assembler::addw(Register Rd, Register Rn, int32_t increment, Register temp) { ++ if (is_imm_in_range(increment, 12, 0)) { ++ addiw(Rd, Rn, increment); ++ } else { ++ assert_different_registers(Rn, temp); ++ li(temp, increment); ++ addw(Rd, Rn, temp); ++ } ++} ++ ++void Assembler::sub(Register Rd, Register Rn, int64_t decrement, Register temp) { ++ if (is_imm_in_range(-decrement, 12, 0)) { ++ addi(Rd, Rn, -decrement); ++ } else { ++ assert_different_registers(Rn, temp); ++ li(temp, decrement); ++ sub(Rd, Rn, temp); ++ } ++} ++ ++void Assembler::subw(Register Rd, Register Rn, int32_t decrement, Register temp) { ++ if (is_imm_in_range(-decrement, 12, 0)) { ++ addiw(Rd, Rn, -decrement); ++ } else { ++ assert_different_registers(Rn, temp); ++ li(temp, decrement); ++ subw(Rd, Rn, temp); ++ } ++} ++ ++void Assembler::zext_w(Register Rd, Register Rs) { ++ add_uw(Rd, Rs, zr); ++} ++ ++void Assembler::li(Register Rd, int64_t imm) { ++ // int64_t is in range 0x8000 0000 0000 0000 ~ 0x7fff ffff ffff ffff ++ int shift = 12; ++ int64_t upper = imm, lower = imm; ++ // Split imm to a lower 12-bit sign-extended part and the remainder, because addi will sign-extend the lower imm. ++ lower = ((int32_t)imm << 20) >> 20; ++ upper -= lower; ++ ++ // Test whether imm is a 32-bit integer. ++ if (!(((imm) & ~(int64_t)0x7fffffff) == 0 || ++ (((imm) & ~(int64_t)0x7fffffff) == ~(int64_t)0x7fffffff))) { ++ while (((upper >> shift) & 1) == 0) { shift++; } ++ upper >>= shift; ++ li(Rd, upper); ++ slli(Rd, Rd, shift); ++ if (lower != 0) { ++ addi(Rd, Rd, lower); ++ } ++ } ++ else { ++ // 32-bit integer ++ Register hi_Rd = zr; ++ if (upper != 0) { ++ lui(Rd, (int32_t)upper); ++ hi_Rd = Rd; ++ } ++ if (lower != 0 || hi_Rd == zr) { ++ addiw(Rd, hi_Rd, lower); ++ } ++ } ++} ++ ++void Assembler::li64(Register Rd, int64_t imm) { ++ // Load upper 32 bits. Upper = imm[63:32], but if imm[31] = 1 or (imm[31:28] == 0x7ff && imm[19] == 1), ++ // upper = imm[63:32] + 1. ++ int64_t lower = imm & 0xffffffff; ++ lower -= ((lower << 44) >> 44); ++ int64_t tmp_imm = ((uint64_t)(imm & 0xffffffff00000000)) + (uint64_t)lower; ++ int32_t upper = (tmp_imm - (int32_t)lower) >> 32; ++ ++ // Load upper 32 bits ++ int64_t up = upper, lo = upper; ++ lo = (lo << 52) >> 52; ++ up -= lo; ++ up = (int32_t)up; ++ lui(Rd, up); ++ addi(Rd, Rd, lo); ++ ++ // Load the rest 32 bits. ++ slli(Rd, Rd, 12); ++ addi(Rd, Rd, (int32_t)lower >> 20); ++ slli(Rd, Rd, 12); ++ lower = ((int32_t)imm << 12) >> 20; ++ addi(Rd, Rd, lower); ++ slli(Rd, Rd, 8); ++ lower = imm & 0xff; ++ addi(Rd, Rd, lower); ++} ++ ++void Assembler::li32(Register Rd, int32_t imm) { ++ // int32_t is in range 0x8000 0000 ~ 0x7fff ffff, and imm[31] is the sign bit ++ int64_t upper = imm, lower = imm; ++ lower = (imm << 20) >> 20; ++ upper -= lower; ++ upper = (int32_t)upper; ++ // lui Rd, imm[31:12] + imm[11] ++ lui(Rd, upper); ++ // use addiw to distinguish li32 to li64 ++ addiw(Rd, Rd, lower); ++} ++ ++#define INSN(NAME, REGISTER) \ ++ void Assembler::NAME(const address &dest, Register temp) { \ ++ assert_cond(dest != NULL); \ ++ int64_t distance = dest - pc(); \ ++ if (is_imm_in_range(distance, 20, 1)) { \ ++ jal(REGISTER, distance); \ ++ } else { \ ++ assert(temp != noreg, "temp must not be empty register!"); \ ++ int32_t offset = 0; \ ++ movptr_with_offset(temp, dest, offset); \ ++ jalr(REGISTER, temp, offset); \ ++ } \ ++ } \ ++ void Assembler::NAME(Label &l, Register temp) { \ ++ jal(REGISTER, l, temp); \ ++ } \ ++ ++ INSN(j, x0); ++ INSN(jal, x1); ++ ++#undef INSN ++ ++#define INSN(NAME, REGISTER) \ ++ void Assembler::NAME(Register Rs) { \ ++ jalr(REGISTER, Rs, 0); \ ++ } ++ ++ INSN(jr, x0); ++ INSN(jalr, x1); ++ ++#undef INSN ++ ++void Assembler::ret() { ++ jalr(x0, x1, 0); ++} ++ ++#define INSN(NAME, REGISTER) \ ++ void Assembler::NAME(const address &dest, Register temp) { \ ++ assert_cond(dest != NULL); \ ++ assert(temp != noreg, "temp must not be empty register!"); \ ++ int64_t distance = dest - pc(); \ ++ if (is_offset_in_range(distance, 32)) { \ ++ auipc(temp, distance + 0x800); \ ++ jalr(REGISTER, temp, ((int32_t)distance << 20) >> 20); \ ++ } else { \ ++ int32_t offset = 0; \ ++ movptr_with_offset(temp, dest, offset); \ ++ jalr(REGISTER, temp, offset); \ ++ } \ ++ } ++ ++ INSN(call, x1); ++ INSN(tail, x0); ++ ++#undef INSN ++ ++#define INSN(NAME, REGISTER) \ ++ void Assembler::NAME(const Address &adr, Register temp) { \ ++ switch(adr.getMode()) { \ ++ case Address::literal: { \ ++ code_section()->relocate(pc(), adr.rspec()); \ ++ NAME(adr.target(), temp); \ ++ break; \ ++ } \ ++ case Address::base_plus_offset: { \ ++ Address tmp_adr = form_address(adr.base(), adr.offset(), 12, temp); \ ++ jalr(REGISTER, tmp_adr.base(), tmp_adr.offset()); \ ++ break; \ ++ } \ ++ default: \ ++ ShouldNotReachHere(); \ ++ } \ ++ } ++ ++ INSN(j, x0); ++ INSN(jal, x1); ++ INSN(call, x1); ++ INSN(tail, x0); ++ ++#undef INSN ++ ++void Assembler::wrap_label(Register r1, Register r2, Label &L, compare_and_branch_insn insn, ++ compare_and_branch_label_insn neg_insn, bool is_far) { ++ if (is_far) { ++ Label done; ++ (this->*neg_insn)(r1, r2, done, /* is_far */ false); ++ j(L); ++ bind(done); ++ } else { ++ if (L.is_bound()) { ++ (this->*insn)(r1, r2, target(L)); ++ } else { ++ L.add_patch_at(code(), locator()); ++ (this->*insn)(r1, r2, pc()); ++ } ++ } ++} ++ ++void Assembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) { ++ if (L.is_bound()) { ++ (this->*insn)(Rt, target(L), tmp); ++ } else { ++ L.add_patch_at(code(), locator()); ++ (this->*insn)(Rt, pc(), tmp); ++ } ++} ++ ++void Assembler::wrap_label(Register Rt, Label &L, jal_jalr_insn insn) { ++ if (L.is_bound()) { ++ (this->*insn)(Rt, target(L)); ++ } else { ++ L.add_patch_at(code(), locator()); ++ (this->*insn)(Rt, pc()); ++ } ++} ++ ++void Assembler::movptr_with_offset(Register Rd, address addr, int32_t &offset) { ++ uintptr_t imm64 = (uintptr_t)addr; ++#ifndef PRODUCT ++ { ++ char buffer[64]; ++ snprintf(buffer, sizeof(buffer), "0x%" PRIx64, imm64); ++ block_comment(buffer); ++ } ++#endif ++ assert(is_unsigned_imm_in_range(imm64, 47, 0) || (imm64 == (uintptr_t)-1), ++ "bit 47 overflows in address constant"); ++ // Load upper 31 bits ++ int32_t imm = imm64 >> 17; ++ int64_t upper = imm, lower = imm; ++ lower = (lower << 52) >> 52; ++ upper -= lower; ++ upper = (int32_t)upper; ++ lui(Rd, upper); ++ addi(Rd, Rd, lower); ++ ++ // Load the rest 17 bits. ++ slli(Rd, Rd, 11); ++ addi(Rd, Rd, (imm64 >> 6) & 0x7ff); ++ slli(Rd, Rd, 6); ++ ++ // Here, remove the addi instruct and return the offset directly. This offset will be used by following jalr/ld. ++ offset = imm64 & 0x3f; ++} ++ ++void Assembler::movptr(Register Rd, uintptr_t imm64) { ++ movptr(Rd, (address)imm64); ++} ++ ++void Assembler::movptr(Register Rd, address addr) { ++ int offset = 0; ++ movptr_with_offset(Rd, addr, offset); ++ addi(Rd, Rd, offset); ++} ++ ++#define INSN(NAME, NEG_INSN) \ ++ void Assembler::NAME(Register Rs, Register Rt, const address &dest) { \ ++ NEG_INSN(Rt, Rs, dest); \ ++ } \ ++ void Assembler::NAME(Register Rs, Register Rt, Label &l, bool is_far) { \ ++ NEG_INSN(Rt, Rs, l, is_far); \ ++ } ++ ++ INSN(bgt, blt); ++ INSN(ble, bge); ++ INSN(bgtu, bltu); ++ INSN(bleu, bgeu); ++#undef INSN ++ ++#undef __ ++ ++Address::Address(address target, relocInfo::relocType rtype) : _base(noreg), _offset(0), _mode(literal) { ++ _target = target; ++ switch (rtype) { ++ case relocInfo::oop_type: ++ case relocInfo::metadata_type: ++ // Oops are a special case. Normally they would be their own section ++ // but in cases like icBuffer they are literals in the code stream that ++ // we don't have a section for. We use none so that we get a literal address ++ // which is always patchable. ++ break; ++ case relocInfo::external_word_type: ++ _rspec = external_word_Relocation::spec(target); ++ break; ++ case relocInfo::internal_word_type: ++ _rspec = internal_word_Relocation::spec(target); ++ break; ++ case relocInfo::opt_virtual_call_type: ++ _rspec = opt_virtual_call_Relocation::spec(); ++ break; ++ case relocInfo::static_call_type: ++ _rspec = static_call_Relocation::spec(); ++ break; ++ case relocInfo::runtime_call_type: ++ _rspec = runtime_call_Relocation::spec(); ++ break; ++ case relocInfo::poll_type: ++ case relocInfo::poll_return_type: ++ _rspec = Relocation::spec_simple(rtype); ++ break; ++ case relocInfo::none: ++ _rspec = RelocationHolder::none; ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp +new file mode 100644 +index 000000000..d4da30ed6 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp +@@ -0,0 +1,2004 @@ ++/* ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_ASSEMBLER_RISCV_HPP ++#define CPU_RISCV_ASSEMBLER_RISCV_HPP ++ ++#include "asm/register.hpp" ++#include "assembler_riscv.inline.hpp" ++ ++#define XLEN 64 ++ ++// definitions of various symbolic names for machine registers ++ ++// First intercalls between C and Java which use 8 general registers ++// and 8 floating registers ++ ++class Argument { ++ public: ++ enum { ++ n_int_register_parameters_c = 8, // x10, x11, ... x17 (c_rarg0, c_rarg1, ...) ++ n_float_register_parameters_c = 8, // f10, f11, ... f17 (c_farg0, c_farg1, ... ) ++ ++ n_int_register_parameters_j = 8, // x11, ... x17, x10 (rj_rarg0, j_rarg1, ...) ++ n_float_register_parameters_j = 8 // f10, f11, ... f17 (j_farg0, j_farg1, ...) ++ }; ++}; ++ ++// function argument(caller-save registers) ++REGISTER_DECLARATION(Register, c_rarg0, x10); ++REGISTER_DECLARATION(Register, c_rarg1, x11); ++REGISTER_DECLARATION(Register, c_rarg2, x12); ++REGISTER_DECLARATION(Register, c_rarg3, x13); ++REGISTER_DECLARATION(Register, c_rarg4, x14); ++REGISTER_DECLARATION(Register, c_rarg5, x15); ++REGISTER_DECLARATION(Register, c_rarg6, x16); ++REGISTER_DECLARATION(Register, c_rarg7, x17); ++ ++REGISTER_DECLARATION(FloatRegister, c_farg0, f10); ++REGISTER_DECLARATION(FloatRegister, c_farg1, f11); ++REGISTER_DECLARATION(FloatRegister, c_farg2, f12); ++REGISTER_DECLARATION(FloatRegister, c_farg3, f13); ++REGISTER_DECLARATION(FloatRegister, c_farg4, f14); ++REGISTER_DECLARATION(FloatRegister, c_farg5, f15); ++REGISTER_DECLARATION(FloatRegister, c_farg6, f16); ++REGISTER_DECLARATION(FloatRegister, c_farg7, f17); ++ ++// java function register(caller-save registers) ++REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); ++REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); ++REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); ++REGISTER_DECLARATION(Register, j_rarg3, c_rarg4); ++REGISTER_DECLARATION(Register, j_rarg4, c_rarg5); ++REGISTER_DECLARATION(Register, j_rarg5, c_rarg6); ++REGISTER_DECLARATION(Register, j_rarg6, c_rarg7); ++REGISTER_DECLARATION(Register, j_rarg7, c_rarg0); ++ ++REGISTER_DECLARATION(FloatRegister, j_farg0, f10); ++REGISTER_DECLARATION(FloatRegister, j_farg1, f11); ++REGISTER_DECLARATION(FloatRegister, j_farg2, f12); ++REGISTER_DECLARATION(FloatRegister, j_farg3, f13); ++REGISTER_DECLARATION(FloatRegister, j_farg4, f14); ++REGISTER_DECLARATION(FloatRegister, j_farg5, f15); ++REGISTER_DECLARATION(FloatRegister, j_farg6, f16); ++REGISTER_DECLARATION(FloatRegister, j_farg7, f17); ++ ++// zero rigster ++REGISTER_DECLARATION(Register, zr, x0); ++// global pointer ++REGISTER_DECLARATION(Register, gp, x3); ++// thread pointer ++REGISTER_DECLARATION(Register, tp, x4); ++ ++// volatile (caller-save) registers ++ ++// current method -- must be in a call-clobbered register ++REGISTER_DECLARATION(Register, xmethod, x31); ++// return address ++REGISTER_DECLARATION(Register, ra, x1); ++ ++// non-volatile (callee-save) registers ++ ++// stack pointer ++REGISTER_DECLARATION(Register, sp, x2); ++// frame pointer ++REGISTER_DECLARATION(Register, fp, x8); ++// base of heap ++REGISTER_DECLARATION(Register, xheapbase, x27); ++// constant pool cache ++REGISTER_DECLARATION(Register, xcpool, x26); ++// monitors allocated on stack ++REGISTER_DECLARATION(Register, xmonitors, x25); ++// locals on stack ++REGISTER_DECLARATION(Register, xlocals, x24); ++ ++/* If you use x4(tp) as java thread pointer according to the instruction manual, ++ * it overlaps with the register used by c++ thread. ++ */ ++// java thread pointer ++REGISTER_DECLARATION(Register, xthread, x23); ++// bytecode pointer ++REGISTER_DECLARATION(Register, xbcp, x22); ++// Dispatch table base ++REGISTER_DECLARATION(Register, xdispatch, x21); ++// Java stack pointer ++REGISTER_DECLARATION(Register, esp, x20); ++ ++// tempory register(caller-save registers) ++REGISTER_DECLARATION(Register, t0, x5); ++REGISTER_DECLARATION(Register, t1, x6); ++REGISTER_DECLARATION(Register, t2, x7); ++ ++const Register g_INTArgReg[Argument::n_int_register_parameters_c] = { ++ c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5, c_rarg6, c_rarg7 ++}; ++ ++const FloatRegister g_FPArgReg[Argument::n_float_register_parameters_c] = { ++ c_farg0, c_farg1, c_farg2, c_farg3, c_farg4, c_farg5, c_farg6, c_farg7 ++}; ++ ++#define assert_cond(ARG1) assert(ARG1, #ARG1) ++ ++// Addressing modes ++class Address { ++ public: ++ ++ enum mode { no_mode, base_plus_offset, pcrel, literal }; ++ ++ private: ++ Register _base; ++ int64_t _offset; ++ enum mode _mode; ++ ++ RelocationHolder _rspec; ++ ++ // If the target is far we'll need to load the ea of this to a ++ // register to reach it. Otherwise if near we can do PC-relative ++ // addressing. ++ address _target; ++ ++ public: ++ Address() ++ : _base(noreg), _offset(0), _mode(no_mode), _target(NULL) { } ++ Address(Register r) ++ : _base(r), _offset(0), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, int o) ++ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, long o) ++ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, long long o) ++ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, unsigned int o) ++ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, unsigned long o) ++ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++ Address(Register r, unsigned long long o) ++ : _base(r), _offset(o), _mode(base_plus_offset), _target(NULL) { } ++#ifdef ASSERT ++ Address(Register r, ByteSize disp) ++ : _base(r), _offset(in_bytes(disp)), _mode(base_plus_offset), _target(NULL) { } ++#endif ++ Address(address target, RelocationHolder const& rspec) ++ : _base(noreg), ++ _offset(0), ++ _mode(literal), ++ _rspec(rspec), ++ _target(target) { } ++ Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type); ++ ++ const Register base() const { ++ guarantee((_mode == base_plus_offset || _mode == pcrel || _mode == literal), "wrong mode"); ++ return _base; ++ } ++ long offset() const { ++ return _offset; ++ } ++ ++ mode getMode() const { ++ return _mode; ++ } ++ ++ bool uses(Register reg) const { return _base == reg;} ++ const address target() const { return _target; } ++ const RelocationHolder& rspec() const { return _rspec; } ++ ~Address() { ++ _target = NULL; ++ _base = NULL; ++ } ++}; ++ ++// Convience classes ++class RuntimeAddress: public Address { ++ ++ public: ++ ++ RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {} ++ ~RuntimeAddress() {} ++}; ++ ++class OopAddress: public Address { ++ ++ public: ++ ++ OopAddress(address target) : Address(target, relocInfo::oop_type) {} ++ ~OopAddress() {} ++}; ++ ++class ExternalAddress: public Address { ++ private: ++ static relocInfo::relocType reloc_for_target(address target) { ++ // Sometimes ExternalAddress is used for values which aren't ++ // exactly addresses, like the card table base. ++ // external_word_type can't be used for values in the first page ++ // so just skip the reloc in that case. ++ return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; ++ } ++ ++ public: ++ ++ ExternalAddress(address target) : Address(target, reloc_for_target(target)) {} ++ ~ExternalAddress() {} ++}; ++ ++class InternalAddress: public Address { ++ ++ public: ++ ++ InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {} ++ ~InternalAddress() {} ++}; ++ ++class Assembler : public AbstractAssembler { ++public: ++ ++ enum { instruction_size = 4 }; ++ ++ enum RoundingMode { ++ rne = 0b000, // round to Nearest, ties to Even ++ rtz = 0b001, // round towards Zero ++ rdn = 0b010, // round Down (towards eegative infinity) ++ rup = 0b011, // round Up (towards infinity) ++ rmm = 0b100, // round to Nearest, ties to Max Magnitude ++ rdy = 0b111, // in instruction's rm field, selects dynamic rounding mode.In Rounding Mode register, Invalid. ++ }; ++ ++ Address form_address_complex(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) { ++ assert_different_registers(noreg, temp, base); ++ int64_t upper = offset, lower = offset; ++ ++ int8_t shift = 64 - expect_offbits; ++ lower = (offset << shift) >> shift; ++ upper -= lower; ++ ++ li(temp, upper); ++ add(temp, temp, base); ++ return Address(temp, lower); ++ } ++ ++ Address form_address(Register base, int64_t offset, int8_t expect_offbits, Register temp = t0) { ++ if (is_offset_in_range(offset, expect_offbits)) { ++ return Address(base, offset); ++ } ++ return form_address_complex(base, offset, expect_offbits, temp); ++ } ++ ++ void li(Register Rd, int64_t imm); // optimized load immediate ++ void li32(Register Rd, int32_t imm); ++ void li64(Register Rd, int64_t imm); ++ void movptr(Register Rd, address addr); ++ void movptr_with_offset(Register Rd, address addr, int32_t &offset); ++ void movptr(Register Rd, uintptr_t imm64); ++ void j(const address &dest, Register temp = t0); ++ void j(const Address &adr, Register temp = t0) ; ++ void j(Label &l, Register temp = t0); ++ void jal(Label &l, Register temp = t0); ++ void jal(const address &dest, Register temp = t0); ++ void jal(const Address &adr, Register temp = t0); ++ void jr(Register Rs); ++ void jalr(Register Rs); ++ void ret(); ++ void call(const address &dest, Register temp = t0); ++ void call(const Address &adr, Register temp = t0); ++ void tail(const address &dest, Register temp = t0); ++ void tail(const Address &adr, Register temp = t0); ++ void call(Label &l, Register temp) { ++ call(target(l), temp); ++ } ++ void tail(Label &l, Register temp) { ++ tail(target(l), temp); ++ } ++ ++ static inline uint32_t extract(uint32_t val, unsigned msb, unsigned lsb) { ++ assert_cond(msb >= lsb && msb <= 31); ++ unsigned nbits = msb - lsb + 1; ++ uint32_t mask = checked_cast(right_n_bits(nbits)); ++ uint32_t result = val >> lsb; ++ result &= mask; ++ return result; ++ } ++ ++ static inline int32_t sextract(uint32_t val, unsigned msb, unsigned lsb) { ++ assert_cond(msb >= lsb && msb <= 31); ++ int32_t result = val << (31 - msb); ++ result >>= (31 - msb + lsb); ++ return result; ++ } ++ ++ static void patch(address a, unsigned msb, unsigned lsb, unsigned val) { ++ assert_cond(a != NULL); ++ assert_cond(msb >= lsb && msb <= 31); ++ unsigned nbits = msb - lsb + 1; ++ guarantee(val < (1ULL << nbits), "Field too big for insn"); ++ unsigned mask = checked_cast(right_n_bits(nbits)); ++ val <<= lsb; ++ mask <<= lsb; ++ unsigned target = *(unsigned *)a; ++ target &= ~mask; ++ target |= val; ++ *(unsigned *)a = target; ++ } ++ ++ static void patch(address a, unsigned bit, unsigned val) { ++ patch(a, bit, bit, val); ++ } ++ ++ static void patch_reg(address a, unsigned lsb, Register reg) { ++ patch(a, lsb + 4, lsb, reg->encoding_nocheck()); ++ } ++ ++ static void patch_reg(address a, unsigned lsb, FloatRegister reg) { ++ patch(a, lsb + 4, lsb, reg->encoding_nocheck()); ++ } ++ ++ static void patch_reg(address a, unsigned lsb, VectorRegister reg) { ++ patch(a, lsb + 4, lsb, reg->encoding_nocheck()); ++ } ++ ++ void emit(unsigned insn) { ++ emit_int32((jint)insn); ++ } ++ ++ void halt() { ++ emit_int32(0); ++ } ++ ++// Rigster Instruction ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch_reg((address)&insn, 20, Rs2); \ ++ emit(insn); \ ++ } ++ ++ INSN(add, 0b0110011, 0b000, 0b0000000); ++ INSN(sub, 0b0110011, 0b000, 0b0100000); ++ INSN(andr, 0b0110011, 0b111, 0b0000000); ++ INSN(orr, 0b0110011, 0b110, 0b0000000); ++ INSN(xorr, 0b0110011, 0b100, 0b0000000); ++ INSN(sll, 0b0110011, 0b001, 0b0000000); ++ INSN(sra, 0b0110011, 0b101, 0b0100000); ++ INSN(srl, 0b0110011, 0b101, 0b0000000); ++ INSN(slt, 0b0110011, 0b010, 0b0000000); ++ INSN(sltu, 0b0110011, 0b011, 0b0000000); ++ INSN(addw, 0b0111011, 0b000, 0b0000000); ++ INSN(subw, 0b0111011, 0b000, 0b0100000); ++ INSN(sllw, 0b0111011, 0b001, 0b0000000); ++ INSN(sraw, 0b0111011, 0b101, 0b0100000); ++ INSN(srlw, 0b0111011, 0b101, 0b0000000); ++ INSN(mul, 0b0110011, 0b000, 0b0000001); ++ INSN(mulh, 0b0110011, 0b001, 0b0000001); ++ INSN(mulhsu,0b0110011, 0b010, 0b0000001); ++ INSN(mulhu, 0b0110011, 0b011, 0b0000001); ++ INSN(mulw, 0b0111011, 0b000, 0b0000001); ++ INSN(div, 0b0110011, 0b100, 0b0000001); ++ INSN(divu, 0b0110011, 0b101, 0b0000001); ++ INSN(divw, 0b0111011, 0b100, 0b0000001); ++ INSN(divuw, 0b0111011, 0b101, 0b0000001); ++ INSN(rem, 0b0110011, 0b110, 0b0000001); ++ INSN(remu, 0b0110011, 0b111, 0b0000001); ++ INSN(remw, 0b0111011, 0b110, 0b0000001); ++ INSN(remuw, 0b0111011, 0b111, 0b0000001); ++ ++ // Vector Configuration Instruction ++ INSN(vsetvl, 0b1010111, 0b111, 0b1000000); ++ ++#undef INSN ++ ++#define INSN_ENTRY_RELOC(result_type, header) \ ++ result_type header { \ ++ guarantee(rtype == relocInfo::internal_word_type, \ ++ "only internal_word_type relocs make sense here"); \ ++ code_section()->relocate(pc(), InternalAddress(dest).rspec()); ++ ++ // Load/store register (all modes) ++#define INSN(NAME, op, funct3) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ unsigned insn = 0; \ ++ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ int32_t val = offset & 0xfff; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch_reg((address)&insn, 15, Rs); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch((address)&insn, 31, 20, val); \ ++ emit(insn); \ ++ } \ ++ void NAME(Register Rd, address dest) { \ ++ assert_cond(dest != NULL); \ ++ int64_t distance = (dest - pc()); \ ++ if (is_offset_in_range(distance, 32)) { \ ++ auipc(Rd, (int32_t)distance + 0x800); \ ++ NAME(Rd, Rd, ((int32_t)distance << 20) >> 20); \ ++ } else { \ ++ int32_t offset = 0; \ ++ movptr_with_offset(Rd, dest, offset); \ ++ NAME(Rd, Rd, offset); \ ++ } \ ++ } \ ++ INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype)) \ ++ NAME(Rd, dest); \ ++ } \ ++ void NAME(Register Rd, const Address &adr, Register temp = t0) { \ ++ switch(adr.getMode()) { \ ++ case Address::literal: { \ ++ code_section()->relocate(pc(), adr.rspec()); \ ++ NAME(Rd, adr.target()); \ ++ break; \ ++ } \ ++ case Address::base_plus_offset: { \ ++ if (is_offset_in_range(adr.offset(), 12)) { \ ++ NAME(Rd, adr.base(), adr.offset()); \ ++ } else { \ ++ NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, Rd == adr.base() ? temp : Rd)); \ ++ } \ ++ break; \ ++ } \ ++ default: \ ++ ShouldNotReachHere(); \ ++ } \ ++ } \ ++ void NAME(Register Rd, Label &L) { \ ++ wrap_label(Rd, L, &Assembler::NAME); \ ++ } ++ ++ INSN(lb, 0b0000011, 0b000); ++ INSN(lbu, 0b0000011, 0b100); ++ INSN(ld, 0b0000011, 0b011); ++ INSN(lh, 0b0000011, 0b001); ++ INSN(lhu, 0b0000011, 0b101); ++ INSN(lw, 0b0000011, 0b010); ++ INSN(lwu, 0b0000011, 0b110); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3) \ ++ void NAME(FloatRegister Rd, Register Rs, const int32_t offset) { \ ++ unsigned insn = 0; \ ++ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ uint32_t val = offset & 0xfff; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch_reg((address)&insn, 15, Rs); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch((address)&insn, 31, 20, val); \ ++ emit(insn); \ ++ } \ ++ void NAME(FloatRegister Rd, address dest, Register temp = t0) { \ ++ assert_cond(dest != NULL); \ ++ int64_t distance = (dest - pc()); \ ++ if (is_offset_in_range(distance, 32)) { \ ++ auipc(temp, (int32_t)distance + 0x800); \ ++ NAME(Rd, temp, ((int32_t)distance << 20) >> 20); \ ++ } else { \ ++ int32_t offset = 0; \ ++ movptr_with_offset(temp, dest, offset); \ ++ NAME(Rd, temp, offset); \ ++ } \ ++ } \ ++ INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, relocInfo::relocType rtype, Register temp = t0)) \ ++ NAME(Rd, dest, temp); \ ++ } \ ++ void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \ ++ switch(adr.getMode()) { \ ++ case Address::literal: { \ ++ code_section()->relocate(pc(), adr.rspec()); \ ++ NAME(Rd, adr.target(), temp); \ ++ break; \ ++ } \ ++ case Address::base_plus_offset: { \ ++ if (is_offset_in_range(adr.offset(), 12)) { \ ++ NAME(Rd, adr.base(), adr.offset()); \ ++ } else { \ ++ NAME(Rd, form_address_complex(adr.base(), adr.offset(), 12, temp)); \ ++ } \ ++ break; \ ++ } \ ++ default: \ ++ ShouldNotReachHere(); \ ++ } \ ++ } ++ ++ INSN(flw, 0b0000111, 0b010); ++ INSN(fld, 0b0000111, 0b011); ++#undef INSN ++ ++#define INSN(NAME, op, funct3) \ ++ void NAME(Register Rs1, Register Rs2, const int64_t offset) { \ ++ unsigned insn = 0; \ ++ guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \ ++ uint32_t val = offset & 0x1fff; \ ++ uint32_t val11 = (val >> 11) & 0x1; \ ++ uint32_t val12 = (val >> 12) & 0x1; \ ++ uint32_t low = (val >> 1) & 0xf; \ ++ uint32_t high = (val >> 5) & 0x3f; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch_reg((address)&insn, 20, Rs2); \ ++ patch((address)&insn, 7, val11); \ ++ patch((address)&insn, 11, 8, low); \ ++ patch((address)&insn, 30, 25, high); \ ++ patch((address)&insn, 31, val12); \ ++ emit(insn); \ ++ } \ ++ void NAME(Register Rs1, Register Rs2, const address dest) { \ ++ assert_cond(dest != NULL); \ ++ int64_t offset = (dest - pc()); \ ++ guarantee(is_imm_in_range(offset, 12, 1), "offset is invalid."); \ ++ NAME(Rs1, Rs2, offset); \ ++ } \ ++ INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype)) \ ++ NAME(Rs1, Rs2, dest); \ ++ } ++ ++ INSN(beq, 0b1100011, 0b000); ++ INSN(bge, 0b1100011, 0b101); ++ INSN(bgeu, 0b1100011, 0b111); ++ INSN(blt, 0b1100011, 0b100); ++ INSN(bltu, 0b1100011, 0b110); ++ INSN(bne, 0b1100011, 0b001); ++ ++#undef INSN ++ ++#define INSN(NAME, NEG_INSN) \ ++ void NAME(Register Rs1, Register Rs2, Label &L, bool is_far = false) { \ ++ wrap_label(Rs1, Rs2, L, &Assembler::NAME, &Assembler::NEG_INSN, is_far); \ ++ } ++ ++ INSN(beq, bne); ++ INSN(bne, beq); ++ INSN(blt, bge); ++ INSN(bge, blt); ++ INSN(bltu, bgeu); ++ INSN(bgeu, bltu); ++ ++#undef INSN ++ ++#define INSN(NAME, REGISTER, op, funct3) \ ++ void NAME(REGISTER Rs1, Register Rs2, const int32_t offset) { \ ++ unsigned insn = 0; \ ++ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ uint32_t val = offset & 0xfff; \ ++ uint32_t low = val & 0x1f; \ ++ uint32_t high = (val >> 5) & 0x7f; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch_reg((address)&insn, 15, Rs2); \ ++ patch_reg((address)&insn, 20, Rs1); \ ++ patch((address)&insn, 11, 7, low); \ ++ patch((address)&insn, 31, 25, high); \ ++ emit(insn); \ ++ } \ ++ INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, relocInfo::relocType rtype, Register temp = t0)) \ ++ NAME(Rs, dest, temp); \ ++ } ++ ++ INSN(sb, Register, 0b0100011, 0b000); ++ INSN(sh, Register, 0b0100011, 0b001); ++ INSN(sw, Register, 0b0100011, 0b010); ++ INSN(sd, Register, 0b0100011, 0b011); ++ INSN(fsw, FloatRegister, 0b0100111, 0b010); ++ INSN(fsd, FloatRegister, 0b0100111, 0b011); ++ ++#undef INSN ++ ++#define INSN(NAME) \ ++ void NAME(Register Rs, address dest, Register temp = t0) { \ ++ assert_cond(dest != NULL); \ ++ assert_different_registers(Rs, temp); \ ++ int64_t distance = (dest - pc()); \ ++ if (is_offset_in_range(distance, 32)) { \ ++ auipc(temp, (int32_t)distance + 0x800); \ ++ NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \ ++ } else { \ ++ int32_t offset = 0; \ ++ movptr_with_offset(temp, dest, offset); \ ++ NAME(Rs, temp, offset); \ ++ } \ ++ } \ ++ void NAME(Register Rs, const Address &adr, Register temp = t0) { \ ++ switch(adr.getMode()) { \ ++ case Address::literal: { \ ++ assert_different_registers(Rs, temp); \ ++ code_section()->relocate(pc(), adr.rspec()); \ ++ NAME(Rs, adr.target(), temp); \ ++ break; \ ++ } \ ++ case Address::base_plus_offset: { \ ++ if (is_offset_in_range(adr.offset(), 12)) { \ ++ NAME(Rs, adr.base(), adr.offset()); \ ++ } else { \ ++ assert_different_registers(Rs, temp); \ ++ NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp)); \ ++ } \ ++ break; \ ++ } \ ++ default: \ ++ ShouldNotReachHere(); \ ++ } \ ++ } ++ ++ INSN(sb); ++ INSN(sh); ++ INSN(sw); ++ INSN(sd); ++ ++#undef INSN ++ ++#define INSN(NAME) \ ++ void NAME(FloatRegister Rs, address dest, Register temp = t0) { \ ++ assert_cond(dest != NULL); \ ++ int64_t distance = (dest - pc()); \ ++ if (is_offset_in_range(distance, 32)) { \ ++ auipc(temp, (int32_t)distance + 0x800); \ ++ NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \ ++ } else { \ ++ int32_t offset = 0; \ ++ movptr_with_offset(temp, dest, offset); \ ++ NAME(Rs, temp, offset); \ ++ } \ ++ } \ ++ void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \ ++ switch(adr.getMode()) { \ ++ case Address::literal: { \ ++ code_section()->relocate(pc(), adr.rspec()); \ ++ NAME(Rs, adr.target(), temp); \ ++ break; \ ++ } \ ++ case Address::base_plus_offset: { \ ++ if (is_offset_in_range(adr.offset(), 12)) { \ ++ NAME(Rs, adr.base(), adr.offset()); \ ++ } else { \ ++ NAME(Rs, form_address_complex(adr.base(), adr.offset(), 12, temp)); \ ++ } \ ++ break; \ ++ } \ ++ default: \ ++ ShouldNotReachHere(); \ ++ } \ ++ } ++ ++ INSN(fsw); ++ INSN(fsd); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3) \ ++ void NAME(Register Rd, const uint32_t csr, Register Rs1) { \ ++ guarantee(is_unsigned_imm_in_range(csr, 12, 0), "csr is invalid"); \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch((address)&insn, 31, 20, csr); \ ++ emit(insn); \ ++ } ++ ++ INSN(csrrw, 0b1110011, 0b001); ++ INSN(csrrs, 0b1110011, 0b010); ++ INSN(csrrc, 0b1110011, 0b011); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3) \ ++ void NAME(Register Rd, const uint32_t csr, const uint32_t uimm) { \ ++ guarantee(is_unsigned_imm_in_range(csr, 12, 0), "csr is invalid"); \ ++ guarantee(is_unsigned_imm_in_range(uimm, 5, 0), "uimm is invalid"); \ ++ unsigned insn = 0; \ ++ uint32_t val = uimm & 0x1f; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch((address)&insn, 19, 15, val); \ ++ patch((address)&insn, 31, 20, csr); \ ++ emit(insn); \ ++ } ++ ++ INSN(csrrwi, 0b1110011, 0b101); ++ INSN(csrrsi, 0b1110011, 0b110); ++ INSN(csrrci, 0b1110011, 0b111); ++ ++#undef INSN ++ ++#define INSN(NAME, op) \ ++ void NAME(Register Rd, const int32_t offset) { \ ++ unsigned insn = 0; \ ++ guarantee(is_imm_in_range(offset, 20, 1), "offset is invalid."); \ ++ patch((address)&insn, 6, 0, op); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff)); \ ++ patch((address)&insn, 20, (uint32_t)((offset >> 11) & 0x1)); \ ++ patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff)); \ ++ patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1)); \ ++ emit(insn); \ ++ } \ ++ void NAME(Register Rd, const address dest, Register temp = t0) { \ ++ assert_cond(dest != NULL); \ ++ int64_t offset = dest - pc(); \ ++ if (is_imm_in_range(offset, 20, 1)) { \ ++ NAME(Rd, offset); \ ++ } else { \ ++ assert_different_registers(Rd, temp); \ ++ int32_t off = 0; \ ++ movptr_with_offset(temp, dest, off); \ ++ jalr(Rd, temp, off); \ ++ } \ ++ } \ ++ void NAME(Register Rd, Label &L, Register temp = t0) { \ ++ assert_different_registers(Rd, temp); \ ++ wrap_label(Rd, L, temp, &Assembler::NAME); \ ++ } ++ ++ INSN(jal, 0b1101111); ++ ++#undef INSN ++ ++#undef INSN_ENTRY_RELOC ++ ++#define INSN(NAME, op, funct) \ ++ void NAME(Register Rd, Register Rs, const int32_t offset) { \ ++ unsigned insn = 0; \ ++ guarantee(is_offset_in_range(offset, 12), "offset is invalid."); \ ++ patch((address)&insn, 6, 0, op); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch((address)&insn, 14, 12, funct); \ ++ patch_reg((address)&insn, 15, Rs); \ ++ int32_t val = offset & 0xfff; \ ++ patch((address)&insn, 31, 20, val); \ ++ emit(insn); \ ++ } ++ ++ INSN(jalr, 0b1100111, 0b000); ++ ++#undef INSN ++ ++ enum barrier { ++ i = 0b1000, o = 0b0100, r = 0b0010, w = 0b0001, ++ ir = i | r, ow = o | w, iorw = i | o | r | w ++ }; ++ ++ void fence(const uint32_t predecessor, const uint32_t successor) { ++ unsigned insn = 0; ++ guarantee(predecessor < 16, "predecessor is invalid"); ++ guarantee(successor < 16, "successor is invalid"); ++ patch((address)&insn, 6, 0, 0b001111); ++ patch((address)&insn, 11, 7, 0b00000); ++ patch((address)&insn, 14, 12, 0b000); ++ patch((address)&insn, 19, 15, 0b00000); ++ patch((address)&insn, 23, 20, successor); ++ patch((address)&insn, 27, 24, predecessor); ++ patch((address)&insn, 31, 28, 0b0000); ++ emit(insn); ++ } ++ ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME() { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 11, 7, 0b00000); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 19, 15, 0b00000); \ ++ patch((address)&insn, 31, 20, funct7); \ ++ emit(insn); \ ++ } ++ ++ INSN(ecall, 0b1110011, 0b000, 0b000000000000); ++ INSN(ebreak, 0b1110011, 0b000, 0b000000000001); ++#undef INSN ++ ++enum Aqrl {relaxed = 0b00, rl = 0b01, aq = 0b10, aqrl = 0b11}; ++ ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = aqrl) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch_reg((address)&insn, 20, Rs2); \ ++ patch((address)&insn, 31, 27, funct7); \ ++ patch((address)&insn, 26, 25, memory_order); \ ++ emit(insn); \ ++ } ++ ++ INSN(amoswap_w, 0b0101111, 0b010, 0b00001); ++ INSN(amoadd_w, 0b0101111, 0b010, 0b00000); ++ INSN(amoxor_w, 0b0101111, 0b010, 0b00100); ++ INSN(amoand_w, 0b0101111, 0b010, 0b01100); ++ INSN(amoor_w, 0b0101111, 0b010, 0b01000); ++ INSN(amomin_w, 0b0101111, 0b010, 0b10000); ++ INSN(amomax_w, 0b0101111, 0b010, 0b10100); ++ INSN(amominu_w, 0b0101111, 0b010, 0b11000); ++ INSN(amomaxu_w, 0b0101111, 0b010, 0b11100); ++ INSN(amoswap_d, 0b0101111, 0b011, 0b00001); ++ INSN(amoadd_d, 0b0101111, 0b011, 0b00000); ++ INSN(amoxor_d, 0b0101111, 0b011, 0b00100); ++ INSN(amoand_d, 0b0101111, 0b011, 0b01100); ++ INSN(amoor_d, 0b0101111, 0b011, 0b01000); ++ INSN(amomin_d, 0b0101111, 0b011, 0b10000); ++ INSN(amomax_d , 0b0101111, 0b011, 0b10100); ++ INSN(amominu_d, 0b0101111, 0b011, 0b11000); ++ INSN(amomaxu_d, 0b0101111, 0b011, 0b11100); ++#undef INSN ++ ++enum operand_size { int8, int16, int32, uint32, int64 }; ++ ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME(Register Rd, Register Rs1, Aqrl memory_order = relaxed) { \ ++ unsigned insn = 0; \ ++ uint32_t val = memory_order & 0x3; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch((address)&insn, 25, 20, 0b00000); \ ++ patch((address)&insn, 31, 27, funct7); \ ++ patch((address)&insn, 26, 25, val); \ ++ emit(insn); \ ++ } ++ ++ INSN(lr_w, 0b0101111, 0b010, 0b00010); ++ INSN(lr_d, 0b0101111, 0b011, 0b00010); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2, Aqrl memory_order = relaxed) { \ ++ unsigned insn = 0; \ ++ uint32_t val = memory_order & 0x3; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs2); \ ++ patch_reg((address)&insn, 20, Rs1); \ ++ patch((address)&insn, 31, 27, funct7); \ ++ patch((address)&insn, 26, 25, val); \ ++ emit(insn); \ ++ } ++ ++ INSN(sc_w, 0b0101111, 0b010, 0b00011); ++ INSN(sc_d, 0b0101111, 0b011, 0b00011); ++#undef INSN ++ ++#define INSN(NAME, op, funct5, funct7) \ ++ void NAME(FloatRegister Rd, FloatRegister Rs1, RoundingMode rm = rne) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, rm); \ ++ patch((address)&insn, 24, 20, funct5); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(fsqrt_s, 0b1010011, 0b00000, 0b0101100); ++ INSN(fsqrt_d, 0b1010011, 0b00000, 0b0101101); ++ INSN(fcvt_s_d, 0b1010011, 0b00001, 0b0100000); ++ INSN(fcvt_d_s, 0b1010011, 0b00000, 0b0100001); ++#undef INSN ++ ++// Immediate Instruction ++#define INSN(NAME, op, funct3) \ ++ void NAME(Register Rd, Register Rs1, int32_t imm) { \ ++ guarantee(is_imm_in_range(imm, 12, 0), "Immediate is out of validity"); \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 31, 20, imm & 0x00000fff); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(addi, 0b0010011, 0b000); ++ INSN(slti, 0b0010011, 0b010); ++ INSN(addiw, 0b0011011, 0b000); ++ INSN(and_imm12, 0b0010011, 0b111); ++ INSN(ori, 0b0010011, 0b110); ++ INSN(xori, 0b0010011, 0b100); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3) \ ++ void NAME(Register Rd, Register Rs1, uint32_t imm) { \ ++ guarantee(is_unsigned_imm_in_range(imm, 12, 0), "Immediate is out of validity"); \ ++ unsigned insn = 0; \ ++ patch((address)&insn,6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 31, 20, imm & 0x00000fff); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(sltiu, 0b0010011, 0b011); ++ ++#undef INSN ++ ++// Shift Immediate Instruction ++#define INSN(NAME, op, funct3, funct6) \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt) { \ ++ guarantee(shamt <= 0x3f, "Shamt is invalid"); \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 25, 20, shamt); \ ++ patch((address)&insn, 31, 26, funct6); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(slli, 0b0010011, 0b001, 0b000000); ++ INSN(srai, 0b0010011, 0b101, 0b010000); ++ INSN(srli, 0b0010011, 0b101, 0b000000); ++ ++#undef INSN ++ ++// Shift Word Immediate Instruction ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt) { \ ++ guarantee(shamt <= 0x1f, "Shamt is invalid"); \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 24, 20, shamt); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(slliw, 0b0011011, 0b001, 0b0000000); ++ INSN(sraiw, 0b0011011, 0b101, 0b0100000); ++ INSN(srliw, 0b0011011, 0b101, 0b0000000); ++ ++#undef INSN ++ ++// Upper Immediate Instruction ++#define INSN(NAME, op) \ ++ void NAME(Register Rd, int32_t imm) { \ ++ int32_t upperImm = imm >> 12; \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ upperImm &= 0x000fffff; \ ++ patch((address)&insn, 31, 12, upperImm); \ ++ emit(insn); \ ++ } ++ ++ INSN(lui, 0b0110111); ++ INSN(auipc, 0b0010111); ++ ++#undef INSN ++ ++// Float and Double Rigster Instruction ++#define INSN(NAME, op, funct2) \ ++ void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, FloatRegister Rs3, RoundingMode rm = rne) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, rm); \ ++ patch((address)&insn, 26, 25, funct2); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch_reg((address)&insn, 20, Rs2); \ ++ patch_reg((address)&insn, 27, Rs3); \ ++ emit(insn); \ ++ } ++ ++ INSN(fmadd_s, 0b1000011, 0b00); ++ INSN(fmsub_s, 0b1000111, 0b00); ++ INSN(fnmsub_s, 0b1001011, 0b00); ++ INSN(fnmadd_s, 0b1001111, 0b00); ++ INSN(fmadd_d, 0b1000011, 0b01); ++ INSN(fmsub_d, 0b1000111, 0b01); ++ INSN(fnmsub_d, 0b1001011, 0b01); ++ INSN(fnmadd_d, 0b1001111, 0b01); ++ ++#undef INSN ++ ++// Float and Double Rigster Instruction ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch_reg((address)&insn, 20, Rs2); \ ++ emit(insn); \ ++ } ++ ++ INSN(fsgnj_s, 0b1010011, 0b000, 0b0010000); ++ INSN(fsgnjn_s, 0b1010011, 0b001, 0b0010000); ++ INSN(fsgnjx_s, 0b1010011, 0b010, 0b0010000); ++ INSN(fmin_s, 0b1010011, 0b000, 0b0010100); ++ INSN(fmax_s, 0b1010011, 0b001, 0b0010100); ++ INSN(fsgnj_d, 0b1010011, 0b000, 0b0010001); ++ INSN(fsgnjn_d, 0b1010011, 0b001, 0b0010001); ++ INSN(fsgnjx_d, 0b1010011, 0b010, 0b0010001); ++ INSN(fmin_d, 0b1010011, 0b000, 0b0010101); ++ INSN(fmax_d, 0b1010011, 0b001, 0b0010101); ++ ++#undef INSN ++ ++// Float and Double Rigster Arith Instruction ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME(Register Rd, FloatRegister Rs1, FloatRegister Rs2) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch_reg((address)&insn, 20, Rs2); \ ++ emit(insn); \ ++ } ++ ++ INSN(feq_s, 0b1010011, 0b010, 0b1010000); ++ INSN(flt_s, 0b1010011, 0b001, 0b1010000); ++ INSN(fle_s, 0b1010011, 0b000, 0b1010000); ++ INSN(feq_d, 0b1010011, 0b010, 0b1010001); ++ INSN(fle_d, 0b1010011, 0b000, 0b1010001); ++ INSN(flt_d, 0b1010011, 0b001, 0b1010001); ++#undef INSN ++ ++// Float and Double Arith Instruction ++#define INSN(NAME, op, funct7) \ ++ void NAME(FloatRegister Rd, FloatRegister Rs1, FloatRegister Rs2, RoundingMode rm = rne) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, rm); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch_reg((address)&insn, 20, Rs2); \ ++ emit(insn); \ ++ } ++ ++ INSN(fadd_s, 0b1010011, 0b0000000); ++ INSN(fsub_s, 0b1010011, 0b0000100); ++ INSN(fmul_s, 0b1010011, 0b0001000); ++ INSN(fdiv_s, 0b1010011, 0b0001100); ++ INSN(fadd_d, 0b1010011, 0b0000001); ++ INSN(fsub_d, 0b1010011, 0b0000101); ++ INSN(fmul_d, 0b1010011, 0b0001001); ++ INSN(fdiv_d, 0b1010011, 0b0001101); ++ ++#undef INSN ++ ++// Whole Float and Double Conversion Instruction ++#define INSN(NAME, op, funct5, funct7) \ ++ void NAME(FloatRegister Rd, Register Rs1, RoundingMode rm = rne) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, rm); \ ++ patch((address)&insn, 24, 20, funct5); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(fcvt_s_w, 0b1010011, 0b00000, 0b1101000); ++ INSN(fcvt_s_wu, 0b1010011, 0b00001, 0b1101000); ++ INSN(fcvt_s_l, 0b1010011, 0b00010, 0b1101000); ++ INSN(fcvt_s_lu, 0b1010011, 0b00011, 0b1101000); ++ INSN(fcvt_d_w, 0b1010011, 0b00000, 0b1101001); ++ INSN(fcvt_d_wu, 0b1010011, 0b00001, 0b1101001); ++ INSN(fcvt_d_l, 0b1010011, 0b00010, 0b1101001); ++ INSN(fcvt_d_lu, 0b1010011, 0b00011, 0b1101001); ++ ++#undef INSN ++ ++// Float and Double Conversion Instruction ++#define INSN(NAME, op, funct5, funct7) \ ++ void NAME(Register Rd, FloatRegister Rs1, RoundingMode rm = rtz) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, rm); \ ++ patch((address)&insn, 24, 20, funct5); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(fcvt_w_s, 0b1010011, 0b00000, 0b1100000); ++ INSN(fcvt_l_s, 0b1010011, 0b00010, 0b1100000); ++ INSN(fcvt_wu_s, 0b1010011, 0b00001, 0b1100000); ++ INSN(fcvt_lu_s, 0b1010011, 0b00011, 0b1100000); ++ INSN(fcvt_w_d, 0b1010011, 0b00000, 0b1100001); ++ INSN(fcvt_wu_d, 0b1010011, 0b00001, 0b1100001); ++ INSN(fcvt_l_d, 0b1010011, 0b00010, 0b1100001); ++ INSN(fcvt_lu_d, 0b1010011, 0b00011, 0b1100001); ++ ++#undef INSN ++ ++// Float and Double Move Instruction ++#define INSN(NAME, op, funct3, funct5, funct7) \ ++ void NAME(FloatRegister Rd, Register Rs1) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 20, funct5); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(fmv_w_x, 0b1010011, 0b000, 0b00000, 0b1111000); ++ INSN(fmv_d_x, 0b1010011, 0b000, 0b00000, 0b1111001); ++ ++#undef INSN ++ ++// Float and Double Conversion Instruction ++#define INSN(NAME, op, funct3, funct5, funct7) \ ++ void NAME(Register Rd, FloatRegister Rs1) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 20, funct5); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(fclass_s, 0b1010011, 0b001, 0b00000, 0b1110000); ++ INSN(fclass_d, 0b1010011, 0b001, 0b00000, 0b1110001); ++ INSN(fmv_x_w, 0b1010011, 0b000, 0b00000, 0b1110000); ++ INSN(fmv_x_d, 0b1010011, 0b000, 0b00000, 0b1110001); ++ ++#undef INSN ++ ++// ========================== ++// RISC-V Vector Extension ++// ========================== ++enum SEW { ++ e8, ++ e16, ++ e32, ++ e64, ++ RESERVED, ++}; ++ ++enum LMUL { ++ mf8 = 0b101, ++ mf4 = 0b110, ++ mf2 = 0b111, ++ m1 = 0b000, ++ m2 = 0b001, ++ m4 = 0b010, ++ m8 = 0b011, ++}; ++ ++enum VMA { ++ mu, // undisturbed ++ ma, // agnostic ++}; ++ ++enum VTA { ++ tu, // undisturbed ++ ta, // agnostic ++}; ++ ++static Assembler::SEW elembytes_to_sew(int ebytes) { ++ assert(ebytes > 0 && ebytes <= 8, "unsupported element size"); ++ return (Assembler::SEW) exact_log2(ebytes); ++} ++ ++static Assembler::SEW elemtype_to_sew(BasicType etype) { ++ return Assembler::elembytes_to_sew(type2aelembytes(etype)); ++} ++ ++#define patch_vtype(hsb, lsb, vlmul, vsew, vta, vma, vill) \ ++ if (vill == 1) { \ ++ guarantee((vlmul | vsew | vta | vma == 0), \ ++ "the other bits in vtype shall be zero"); \ ++ } \ ++ patch((address)&insn, lsb + 2, lsb, vlmul); \ ++ patch((address)&insn, lsb + 5, lsb + 3, vsew); \ ++ patch((address)&insn, lsb + 6, vta); \ ++ patch((address)&insn, lsb + 7, vma); \ ++ patch((address)&insn, hsb - 1, lsb + 8, 0); \ ++ patch((address)&insn, hsb, vill) ++ ++#define INSN(NAME, op, funct3) \ ++ void NAME(Register Rd, Register Rs1, SEW sew, LMUL lmul = m1, \ ++ VMA vma = mu, VTA vta = tu, bool vill = false) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch_vtype(30, 20, lmul, sew, vta, vma, vill); \ ++ patch((address)&insn, 31, 0); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(vsetvli, 0b1010111, 0b111); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3) \ ++ void NAME(Register Rd, uint32_t imm, SEW sew, LMUL lmul = m1, \ ++ VMA vma = mu, VTA vta = tu, bool vill = false) { \ ++ unsigned insn = 0; \ ++ guarantee(is_unsigned_imm_in_range(imm, 5, 0), "imm is invalid"); \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 19, 15, imm); \ ++ patch_vtype(29, 20, lmul, sew, vta, vma, vill); \ ++ patch((address)&insn, 31, 30, 0b11); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ emit(insn); \ ++ } ++ ++ INSN(vsetivli, 0b1010111, 0b111); ++ ++#undef INSN ++ ++#undef patch_vtype ++ ++enum VectorMask { ++ v0_t = 0b0, ++ unmasked = 0b1 ++}; ++ ++#define patch_VArith(op, Reg, funct3, Reg_or_Imm5, Vs2, vm, funct6) \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 19, 15, Reg_or_Imm5); \ ++ patch((address)&insn, 25, vm); \ ++ patch((address)&insn, 31, 26, funct6); \ ++ patch_reg((address)&insn, 7, Reg); \ ++ patch_reg((address)&insn, 20, Vs2); \ ++ emit(insn) ++ ++// r2_vm ++#define INSN(NAME, op, funct3, Vs1, funct6) \ ++ void NAME(Register Rd, VectorRegister Vs2, VectorMask vm = unmasked) { \ ++ patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \ ++ } ++ ++ // Vector Mask ++ INSN(vpopc_m, 0b1010111, 0b010, 0b10000, 0b010000); ++ INSN(vfirst_m, 0b1010111, 0b010, 0b10001, 0b010000); ++#undef INSN ++ ++#define INSN(NAME, op, funct3, Vs1, funct6) \ ++ void NAME(VectorRegister Vd, VectorRegister Vs2, VectorMask vm = unmasked) { \ ++ patch_VArith(op, Vd, funct3, Vs1, Vs2, vm, funct6); \ ++ } ++ ++ // Vector Integer Extension ++ INSN(vzext_vf2, 0b1010111, 0b010, 0b00110, 0b010010); ++ INSN(vzext_vf4, 0b1010111, 0b010, 0b00100, 0b010010); ++ INSN(vzext_vf8, 0b1010111, 0b010, 0b00010, 0b010010); ++ INSN(vsext_vf2, 0b1010111, 0b010, 0b00111, 0b010010); ++ INSN(vsext_vf4, 0b1010111, 0b010, 0b00101, 0b010010); ++ INSN(vsext_vf8, 0b1010111, 0b010, 0b00011, 0b010010); ++ ++ // Vector Mask ++ INSN(vmsbf_m, 0b1010111, 0b010, 0b00001, 0b010100); ++ INSN(vmsif_m, 0b1010111, 0b010, 0b00011, 0b010100); ++ INSN(vmsof_m, 0b1010111, 0b010, 0b00010, 0b010100); ++ INSN(viota_m, 0b1010111, 0b010, 0b10000, 0b010100); ++ ++ // Vector Single-Width Floating-Point/Integer Type-Convert Instructions ++ INSN(vfcvt_xu_f_v, 0b1010111, 0b001, 0b00000, 0b010010); ++ INSN(vfcvt_x_f_v, 0b1010111, 0b001, 0b00001, 0b010010); ++ INSN(vfcvt_f_xu_v, 0b1010111, 0b001, 0b00010, 0b010010); ++ INSN(vfcvt_f_x_v, 0b1010111, 0b001, 0b00011, 0b010010); ++ INSN(vfcvt_rtz_xu_f_v, 0b1010111, 0b001, 0b00110, 0b010010); ++ INSN(vfcvt_rtz_x_f_v, 0b1010111, 0b001, 0b00111, 0b010010); ++ ++ // Vector Floating-Point Instruction ++ INSN(vfsqrt_v, 0b1010111, 0b001, 0b00000, 0b010011); ++ INSN(vfclass_v, 0b1010111, 0b001, 0b10000, 0b010011); ++ ++#undef INSN ++ ++// r2rd ++#define INSN(NAME, op, funct3, simm5, vm, funct6) \ ++ void NAME(VectorRegister Vd, VectorRegister Vs2) { \ ++ patch_VArith(op, Vd, funct3, simm5, Vs2, vm, funct6); \ ++ } ++ ++ // Vector Whole Vector Register Move ++ INSN(vmv1r_v, 0b1010111, 0b011, 0b00000, 0b1, 0b100111); ++ INSN(vmv2r_v, 0b1010111, 0b011, 0b00001, 0b1, 0b100111); ++ INSN(vmv4r_v, 0b1010111, 0b011, 0b00011, 0b1, 0b100111); ++ INSN(vmv8r_v, 0b1010111, 0b011, 0b00111, 0b1, 0b100111); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, Vs1, vm, funct6) \ ++ void NAME(FloatRegister Rd, VectorRegister Vs2) { \ ++ patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \ ++ } ++ ++ // Vector Floating-Point Move Instruction ++ INSN(vfmv_f_s, 0b1010111, 0b001, 0b00000, 0b1, 0b010000); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, Vs1, vm, funct6) \ ++ void NAME(Register Rd, VectorRegister Vs2) { \ ++ patch_VArith(op, Rd, funct3, Vs1, Vs2, vm, funct6); \ ++ } ++ ++ // Vector Integer Scalar Move Instructions ++ INSN(vmv_x_s, 0b1010111, 0b010, 0b00000, 0b1, 0b010000); ++ ++#undef INSN ++ ++// r_vm ++#define INSN(NAME, op, funct3, funct6) \ ++ void NAME(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) { \ ++ guarantee(is_unsigned_imm_in_range(imm, 5, 0), "imm is invalid"); \ ++ patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \ ++ } ++ ++ // Vector Single-Width Bit Shift Instructions ++ INSN(vsra_vi, 0b1010111, 0b011, 0b101001); ++ INSN(vsrl_vi, 0b1010111, 0b011, 0b101000); ++ INSN(vsll_vi, 0b1010111, 0b011, 0b100101); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, funct6) \ ++ void NAME(VectorRegister Vd, VectorRegister Vs1, VectorRegister Vs2, VectorMask vm = unmasked) { \ ++ patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \ ++ } ++ ++ // Vector Single-Width Floating-Point Fused Multiply-Add Instructions ++ INSN(vfnmsub_vv, 0b1010111, 0b001, 0b101011); ++ INSN(vfmsub_vv, 0b1010111, 0b001, 0b101010); ++ INSN(vfnmadd_vv, 0b1010111, 0b001, 0b101001); ++ INSN(vfmadd_vv, 0b1010111, 0b001, 0b101000); ++ INSN(vfnmsac_vv, 0b1010111, 0b001, 0b101111); ++ INSN(vfmsac_vv, 0b1010111, 0b001, 0b101110); ++ INSN(vfmacc_vv, 0b1010111, 0b001, 0b101100); ++ INSN(vfnmacc_vv, 0b1010111, 0b001, 0b101101); ++ ++ // Vector Single-Width Integer Multiply-Add Instructions ++ INSN(vnmsub_vv, 0b1010111, 0b010, 0b101011); ++ INSN(vmadd_vv, 0b1010111, 0b010, 0b101001); ++ INSN(vnmsac_vv, 0b1010111, 0b010, 0b101111); ++ INSN(vmacc_vv, 0b1010111, 0b010, 0b101101); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, funct6) \ ++ void NAME(VectorRegister Vd, Register Rs1, VectorRegister Vs2, VectorMask vm = unmasked) { \ ++ patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ ++ } ++ ++ // Vector Single-Width Integer Multiply-Add Instructions ++ INSN(vnmsub_vx, 0b1010111, 0b110, 0b101011); ++ INSN(vmadd_vx, 0b1010111, 0b110, 0b101001); ++ INSN(vnmsac_vx, 0b1010111, 0b110, 0b101111); ++ INSN(vmacc_vx, 0b1010111, 0b110, 0b101101); ++ ++ INSN(vrsub_vx, 0b1010111, 0b100, 0b000011); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, funct6) \ ++ void NAME(VectorRegister Vd, FloatRegister Rs1, VectorRegister Vs2, VectorMask vm = unmasked) { \ ++ patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ ++ } ++ ++ // Vector Single-Width Floating-Point Fused Multiply-Add Instructions ++ INSN(vfnmsub_vf, 0b1010111, 0b101, 0b101011); ++ INSN(vfmsub_vf, 0b1010111, 0b101, 0b101010); ++ INSN(vfnmadd_vf, 0b1010111, 0b101, 0b101001); ++ INSN(vfmadd_vf, 0b1010111, 0b101, 0b101000); ++ INSN(vfnmsac_vf, 0b1010111, 0b101, 0b101111); ++ INSN(vfmsac_vf, 0b1010111, 0b101, 0b101110); ++ INSN(vfmacc_vf, 0b1010111, 0b101, 0b101100); ++ INSN(vfnmacc_vf, 0b1010111, 0b101, 0b101101); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, funct6) \ ++ void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1, VectorMask vm = unmasked) { \ ++ patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \ ++ } ++ ++ // Vector Single-Width Floating-Point Reduction Instructions ++ INSN(vfredsum_vs, 0b1010111, 0b001, 0b000001); ++ INSN(vfredosum_vs, 0b1010111, 0b001, 0b000011); ++ INSN(vfredmin_vs, 0b1010111, 0b001, 0b000101); ++ INSN(vfredmax_vs, 0b1010111, 0b001, 0b000111); ++ ++ // Vector Single-Width Integer Reduction Instructions ++ INSN(vredsum_vs, 0b1010111, 0b010, 0b000000); ++ INSN(vredand_vs, 0b1010111, 0b010, 0b000001); ++ INSN(vredor_vs, 0b1010111, 0b010, 0b000010); ++ INSN(vredxor_vs, 0b1010111, 0b010, 0b000011); ++ INSN(vredminu_vs, 0b1010111, 0b010, 0b000100); ++ INSN(vredmin_vs, 0b1010111, 0b010, 0b000101); ++ INSN(vredmaxu_vs, 0b1010111, 0b010, 0b000110); ++ INSN(vredmax_vs, 0b1010111, 0b010, 0b000111); ++ ++ // Vector Floating-Point Compare Instructions ++ INSN(vmfle_vv, 0b1010111, 0b001, 0b011001); ++ INSN(vmflt_vv, 0b1010111, 0b001, 0b011011); ++ INSN(vmfne_vv, 0b1010111, 0b001, 0b011100); ++ INSN(vmfeq_vv, 0b1010111, 0b001, 0b011000); ++ ++ // Vector Floating-Point Sign-Injection Instructions ++ INSN(vfsgnjx_vv, 0b1010111, 0b001, 0b001010); ++ INSN(vfsgnjn_vv, 0b1010111, 0b001, 0b001001); ++ INSN(vfsgnj_vv, 0b1010111, 0b001, 0b001000); ++ ++ // Vector Floating-Point MIN/MAX Instructions ++ INSN(vfmax_vv, 0b1010111, 0b001, 0b000110); ++ INSN(vfmin_vv, 0b1010111, 0b001, 0b000100); ++ ++ // Vector Single-Width Floating-Point Multiply/Divide Instructions ++ INSN(vfdiv_vv, 0b1010111, 0b001, 0b100000); ++ INSN(vfmul_vv, 0b1010111, 0b001, 0b100100); ++ ++ // Vector Single-Width Floating-Point Add/Subtract Instructions ++ INSN(vfsub_vv, 0b1010111, 0b001, 0b000010); ++ INSN(vfadd_vv, 0b1010111, 0b001, 0b000000); ++ ++ // Vector Single-Width Fractional Multiply with Rounding and Saturation ++ INSN(vsmul_vv, 0b1010111, 0b000, 0b100111); ++ ++ // Vector Integer Divide Instructions ++ INSN(vrem_vv, 0b1010111, 0b010, 0b100011); ++ INSN(vremu_vv, 0b1010111, 0b010, 0b100010); ++ INSN(vdiv_vv, 0b1010111, 0b010, 0b100001); ++ INSN(vdivu_vv, 0b1010111, 0b010, 0b100000); ++ ++ // Vector Single-Width Integer Multiply Instructions ++ INSN(vmulhsu_vv, 0b1010111, 0b010, 0b100110); ++ INSN(vmulhu_vv, 0b1010111, 0b010, 0b100100); ++ INSN(vmulh_vv, 0b1010111, 0b010, 0b100111); ++ INSN(vmul_vv, 0b1010111, 0b010, 0b100101); ++ ++ // Vector Integer Min/Max Instructions ++ INSN(vmax_vv, 0b1010111, 0b000, 0b000111); ++ INSN(vmaxu_vv, 0b1010111, 0b000, 0b000110); ++ INSN(vmin_vv, 0b1010111, 0b000, 0b000101); ++ INSN(vminu_vv, 0b1010111, 0b000, 0b000100); ++ ++ // Vector Integer Comparison Instructions ++ INSN(vmsle_vv, 0b1010111, 0b000, 0b011101); ++ INSN(vmsleu_vv, 0b1010111, 0b000, 0b011100); ++ INSN(vmslt_vv, 0b1010111, 0b000, 0b011011); ++ INSN(vmsltu_vv, 0b1010111, 0b000, 0b011010); ++ INSN(vmsne_vv, 0b1010111, 0b000, 0b011001); ++ INSN(vmseq_vv, 0b1010111, 0b000, 0b011000); ++ ++ // Vector Single-Width Bit Shift Instructions ++ INSN(vsra_vv, 0b1010111, 0b000, 0b101001); ++ INSN(vsrl_vv, 0b1010111, 0b000, 0b101000); ++ INSN(vsll_vv, 0b1010111, 0b000, 0b100101); ++ ++ // Vector Bitwise Logical Instructions ++ INSN(vxor_vv, 0b1010111, 0b000, 0b001011); ++ INSN(vor_vv, 0b1010111, 0b000, 0b001010); ++ INSN(vand_vv, 0b1010111, 0b000, 0b001001); ++ ++ // Vector Single-Width Integer Add and Subtract ++ INSN(vsub_vv, 0b1010111, 0b000, 0b000010); ++ INSN(vadd_vv, 0b1010111, 0b000, 0b000000); ++ ++#undef INSN ++ ++ ++#define INSN(NAME, op, funct3, funct6) \ ++ void NAME(VectorRegister Vd, VectorRegister Vs2, Register Rs1, VectorMask vm = unmasked) { \ ++ patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ ++ } ++ ++ // Vector Integer Divide Instructions ++ INSN(vrem_vx, 0b1010111, 0b110, 0b100011); ++ INSN(vremu_vx, 0b1010111, 0b110, 0b100010); ++ INSN(vdiv_vx, 0b1010111, 0b110, 0b100001); ++ INSN(vdivu_vx, 0b1010111, 0b110, 0b100000); ++ ++ // Vector Single-Width Integer Multiply Instructions ++ INSN(vmulhsu_vx, 0b1010111, 0b110, 0b100110); ++ INSN(vmulhu_vx, 0b1010111, 0b110, 0b100100); ++ INSN(vmulh_vx, 0b1010111, 0b110, 0b100111); ++ INSN(vmul_vx, 0b1010111, 0b110, 0b100101); ++ ++ // Vector Integer Min/Max Instructions ++ INSN(vmax_vx, 0b1010111, 0b100, 0b000111); ++ INSN(vmaxu_vx, 0b1010111, 0b100, 0b000110); ++ INSN(vmin_vx, 0b1010111, 0b100, 0b000101); ++ INSN(vminu_vx, 0b1010111, 0b100, 0b000100); ++ ++ // Vector Integer Comparison Instructions ++ INSN(vmsgt_vx, 0b1010111, 0b100, 0b011111); ++ INSN(vmsgtu_vx, 0b1010111, 0b100, 0b011110); ++ INSN(vmsle_vx, 0b1010111, 0b100, 0b011101); ++ INSN(vmsleu_vx, 0b1010111, 0b100, 0b011100); ++ INSN(vmslt_vx, 0b1010111, 0b100, 0b011011); ++ INSN(vmsltu_vx, 0b1010111, 0b100, 0b011010); ++ INSN(vmsne_vx, 0b1010111, 0b100, 0b011001); ++ INSN(vmseq_vx, 0b1010111, 0b100, 0b011000); ++ ++ // Vector Narrowing Integer Right Shift Instructions ++ INSN(vnsra_wx, 0b1010111, 0b100, 0b101101); ++ INSN(vnsrl_wx, 0b1010111, 0b100, 0b101100); ++ ++ // Vector Single-Width Bit Shift Instructions ++ INSN(vsra_vx, 0b1010111, 0b100, 0b101001); ++ INSN(vsrl_vx, 0b1010111, 0b100, 0b101000); ++ INSN(vsll_vx, 0b1010111, 0b100, 0b100101); ++ ++ // Vector Bitwise Logical Instructions ++ INSN(vxor_vx, 0b1010111, 0b100, 0b001011); ++ INSN(vor_vx, 0b1010111, 0b100, 0b001010); ++ INSN(vand_vx, 0b1010111, 0b100, 0b001001); ++ ++ // Vector Single-Width Integer Add and Subtract ++ INSN(vsub_vx, 0b1010111, 0b100, 0b000010); ++ INSN(vadd_vx, 0b1010111, 0b100, 0b000000); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, funct6) \ ++ void NAME(VectorRegister Vd, VectorRegister Vs2, FloatRegister Rs1, VectorMask vm = unmasked) { \ ++ patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ ++ } ++ ++ // Vector Floating-Point Compare Instructions ++ INSN(vmfge_vf, 0b1010111, 0b101, 0b011111); ++ INSN(vmfgt_vf, 0b1010111, 0b101, 0b011101); ++ INSN(vmfle_vf, 0b1010111, 0b101, 0b011001); ++ INSN(vmflt_vf, 0b1010111, 0b101, 0b011011); ++ INSN(vmfne_vf, 0b1010111, 0b101, 0b011100); ++ INSN(vmfeq_vf, 0b1010111, 0b101, 0b011000); ++ ++ // Vector Floating-Point Sign-Injection Instructions ++ INSN(vfsgnjx_vf, 0b1010111, 0b101, 0b001010); ++ INSN(vfsgnjn_vf, 0b1010111, 0b101, 0b001001); ++ INSN(vfsgnj_vf, 0b1010111, 0b101, 0b001000); ++ ++ // Vector Floating-Point MIN/MAX Instructions ++ INSN(vfmax_vf, 0b1010111, 0b101, 0b000110); ++ INSN(vfmin_vf, 0b1010111, 0b101, 0b000100); ++ ++ // Vector Single-Width Floating-Point Multiply/Divide Instructions ++ INSN(vfdiv_vf, 0b1010111, 0b101, 0b100000); ++ INSN(vfmul_vf, 0b1010111, 0b101, 0b100100); ++ INSN(vfrdiv_vf, 0b1010111, 0b101, 0b100001); ++ ++ // Vector Single-Width Floating-Point Add/Subtract Instructions ++ INSN(vfsub_vf, 0b1010111, 0b101, 0b000010); ++ INSN(vfadd_vf, 0b1010111, 0b101, 0b000000); ++ INSN(vfrsub_vf, 0b1010111, 0b101, 0b100111); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, funct6) \ ++ void NAME(VectorRegister Vd, VectorRegister Vs2, int32_t imm, VectorMask vm = unmasked) { \ ++ guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid"); \ ++ patch_VArith(op, Vd, funct3, (uint32_t)imm & 0x1f, Vs2, vm, funct6); \ ++ } ++ ++ INSN(vmsgt_vi, 0b1010111, 0b011, 0b011111); ++ INSN(vmsgtu_vi, 0b1010111, 0b011, 0b011110); ++ INSN(vmsle_vi, 0b1010111, 0b011, 0b011101); ++ INSN(vmsleu_vi, 0b1010111, 0b011, 0b011100); ++ INSN(vmsne_vi, 0b1010111, 0b011, 0b011001); ++ INSN(vmseq_vi, 0b1010111, 0b011, 0b011000); ++ INSN(vxor_vi, 0b1010111, 0b011, 0b001011); ++ INSN(vor_vi, 0b1010111, 0b011, 0b001010); ++ INSN(vand_vi, 0b1010111, 0b011, 0b001001); ++ INSN(vadd_vi, 0b1010111, 0b011, 0b000000); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, funct6) \ ++ void NAME(VectorRegister Vd, int32_t imm, VectorRegister Vs2, VectorMask vm = unmasked) { \ ++ guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid"); \ ++ patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \ ++ } ++ ++ INSN(vrsub_vi, 0b1010111, 0b011, 0b000011); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, vm, funct6) \ ++ void NAME(VectorRegister Vd, VectorRegister Vs2, VectorRegister Vs1) { \ ++ patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \ ++ } ++ ++ // Vector Compress Instruction ++ INSN(vcompress_vm, 0b1010111, 0b010, 0b1, 0b010111); ++ ++ // Vector Mask-Register Logical Instructions ++ INSN(vmxnor_mm, 0b1010111, 0b010, 0b1, 0b011111); ++ INSN(vmornot_mm, 0b1010111, 0b010, 0b1, 0b011100); ++ INSN(vmnor_mm, 0b1010111, 0b010, 0b1, 0b011110); ++ INSN(vmor_mm, 0b1010111, 0b010, 0b1, 0b011010); ++ INSN(vmxor_mm, 0b1010111, 0b010, 0b1, 0b011011); ++ INSN(vmandnot_mm, 0b1010111, 0b010, 0b1, 0b011000); ++ INSN(vmnand_mm, 0b1010111, 0b010, 0b1, 0b011101); ++ INSN(vmand_mm, 0b1010111, 0b010, 0b1, 0b011001); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, Vs2, vm, funct6) \ ++ void NAME(VectorRegister Vd, int32_t imm) { \ ++ guarantee(is_imm_in_range(imm, 5, 0), "imm is invalid"); \ ++ patch_VArith(op, Vd, funct3, (uint32_t)(imm & 0x1f), Vs2, vm, funct6); \ ++ } ++ ++ // Vector Integer Move Instructions ++ INSN(vmv_v_i, 0b1010111, 0b011, v0, 0b1, 0b010111); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, Vs2, vm, funct6) \ ++ void NAME(VectorRegister Vd, FloatRegister Rs1) { \ ++ patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ ++ } ++ ++ // Floating-Point Scalar Move Instructions ++ INSN(vfmv_s_f, 0b1010111, 0b101, v0, 0b1, 0b010000); ++ // Vector Floating-Point Move Instruction ++ INSN(vfmv_v_f, 0b1010111, 0b101, v0, 0b1, 0b010111); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, Vs2, vm, funct6) \ ++ void NAME(VectorRegister Vd, VectorRegister Vs1) { \ ++ patch_VArith(op, Vd, funct3, Vs1->encoding_nocheck(), Vs2, vm, funct6); \ ++ } ++ ++ // Vector Integer Move Instructions ++ INSN(vmv_v_v, 0b1010111, 0b000, v0, 0b1, 0b010111); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, Vs2, vm, funct6) \ ++ void NAME(VectorRegister Vd, Register Rs1) { \ ++ patch_VArith(op, Vd, funct3, Rs1->encoding_nocheck(), Vs2, vm, funct6); \ ++ } ++ ++ // Integer Scalar Move Instructions ++ INSN(vmv_s_x, 0b1010111, 0b110, v0, 0b1, 0b010000); ++ ++ // Vector Integer Move Instructions ++ INSN(vmv_v_x, 0b1010111, 0b100, v0, 0b1, 0b010111); ++ ++#undef INSN ++#undef patch_VArith ++ ++#define INSN(NAME, op, funct13, funct6) \ ++ void NAME(VectorRegister Vd, VectorMask vm = unmasked) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 24, 12, funct13); \ ++ patch((address)&insn, 25, vm); \ ++ patch((address)&insn, 31, 26, funct6); \ ++ patch_reg((address)&insn, 7, Vd); \ ++ emit(insn); \ ++ } ++ ++ // Vector Element Index Instruction ++ INSN(vid_v, 0b1010111, 0b0000010001010, 0b010100); ++ ++#undef INSN ++ ++enum Nf { ++ g1 = 0b000, ++ g2 = 0b001, ++ g3 = 0b010, ++ g4 = 0b011, ++ g5 = 0b100, ++ g6 = 0b101, ++ g7 = 0b110, ++ g8 = 0b111 ++}; ++ ++#define patch_VLdSt(op, VReg, width, Rs1, Reg_or_umop, vm, mop, mew, nf) \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, width); \ ++ patch((address)&insn, 24, 20, Reg_or_umop); \ ++ patch((address)&insn, 25, vm); \ ++ patch((address)&insn, 27, 26, mop); \ ++ patch((address)&insn, 28, mew); \ ++ patch((address)&insn, 31, 29, nf); \ ++ patch_reg((address)&insn, 7, VReg); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn) ++ ++#define INSN(NAME, op, lumop, vm, mop, nf) \ ++ void NAME(VectorRegister Vd, Register Rs1, uint32_t width = 0, bool mew = false) { \ ++ guarantee(is_unsigned_imm_in_range(width, 3, 0), "width is invalid"); \ ++ patch_VLdSt(op, Vd, width, Rs1, lumop, vm, mop, mew, nf); \ ++ } ++ ++ // Vector Load/Store Instructions ++ INSN(vl1r_v, 0b0000111, 0b01000, 0b1, 0b00, g1); ++ ++#undef INSN ++ ++#define INSN(NAME, op, width, sumop, vm, mop, mew, nf) \ ++ void NAME(VectorRegister Vs3, Register Rs1) { \ ++ patch_VLdSt(op, Vs3, width, Rs1, sumop, vm, mop, mew, nf); \ ++ } ++ ++ // Vector Load/Store Instructions ++ INSN(vs1r_v, 0b0100111, 0b000, 0b01000, 0b1, 0b00, 0b0, g1); ++ ++#undef INSN ++ ++// r2_nfvm ++#define INSN(NAME, op, width, umop, mop, mew) \ ++ void NAME(VectorRegister Vd_or_Vs3, Register Rs1, Nf nf = g1) { \ ++ patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, 1, mop, mew, nf); \ ++ } ++ ++ // Vector Unit-Stride Instructions ++ INSN(vle1_v, 0b0000111, 0b000, 0b01011, 0b00, 0b0); ++ INSN(vse1_v, 0b0100111, 0b000, 0b01011, 0b00, 0b0); ++ ++#undef INSN ++ ++#define INSN(NAME, op, width, umop, mop, mew) \ ++ void NAME(VectorRegister Vd_or_Vs3, Register Rs1, VectorMask vm = unmasked, Nf nf = g1) { \ ++ patch_VLdSt(op, Vd_or_Vs3, width, Rs1, umop, vm, mop, mew, nf); \ ++ } ++ ++ // Vector Unit-Stride Instructions ++ INSN(vle8_v, 0b0000111, 0b000, 0b00000, 0b00, 0b0); ++ INSN(vle16_v, 0b0000111, 0b101, 0b00000, 0b00, 0b0); ++ INSN(vle32_v, 0b0000111, 0b110, 0b00000, 0b00, 0b0); ++ INSN(vle64_v, 0b0000111, 0b111, 0b00000, 0b00, 0b0); ++ ++ // Vector unit-stride fault-only-first Instructions ++ INSN(vle8ff_v, 0b0000111, 0b000, 0b10000, 0b00, 0b0); ++ INSN(vle16ff_v, 0b0000111, 0b101, 0b10000, 0b00, 0b0); ++ INSN(vle32ff_v, 0b0000111, 0b110, 0b10000, 0b00, 0b0); ++ INSN(vle64ff_v, 0b0000111, 0b111, 0b10000, 0b00, 0b0); ++ ++ INSN(vse8_v, 0b0100111, 0b000, 0b00000, 0b00, 0b0); ++ INSN(vse16_v, 0b0100111, 0b101, 0b00000, 0b00, 0b0); ++ INSN(vse32_v, 0b0100111, 0b110, 0b00000, 0b00, 0b0); ++ INSN(vse64_v, 0b0100111, 0b111, 0b00000, 0b00, 0b0); ++ ++#undef INSN ++ ++#define INSN(NAME, op, width, mop, mew) \ ++ void NAME(VectorRegister Vd, Register Rs1, VectorRegister Vs2, VectorMask vm = unmasked, Nf nf = g1) { \ ++ patch_VLdSt(op, Vd, width, Rs1, Vs2->encoding_nocheck(), vm, mop, mew, nf); \ ++ } ++ ++ // Vector unordered indexed load instructions ++ INSN(vluxei8_v, 0b0000111, 0b000, 0b01, 0b0); ++ INSN(vluxei16_v, 0b0000111, 0b101, 0b01, 0b0); ++ INSN(vluxei32_v, 0b0000111, 0b110, 0b01, 0b0); ++ INSN(vluxei64_v, 0b0000111, 0b111, 0b01, 0b0); ++ ++ // Vector ordered indexed load instructions ++ INSN(vloxei8_v, 0b0000111, 0b000, 0b11, 0b0); ++ INSN(vloxei16_v, 0b0000111, 0b101, 0b11, 0b0); ++ INSN(vloxei32_v, 0b0000111, 0b110, 0b11, 0b0); ++ INSN(vloxei64_v, 0b0000111, 0b111, 0b11, 0b0); ++#undef INSN ++ ++#define INSN(NAME, op, width, mop, mew) \ ++ void NAME(VectorRegister Vd, Register Rs1, Register Rs2, VectorMask vm = unmasked, Nf nf = g1) { \ ++ patch_VLdSt(op, Vd, width, Rs1, Rs2->encoding_nocheck(), vm, mop, mew, nf); \ ++ } ++ ++ // Vector Strided Instructions ++ INSN(vlse8_v, 0b0000111, 0b000, 0b10, 0b0); ++ INSN(vlse16_v, 0b0000111, 0b101, 0b10, 0b0); ++ INSN(vlse32_v, 0b0000111, 0b110, 0b10, 0b0); ++ INSN(vlse64_v, 0b0000111, 0b111, 0b10, 0b0); ++ ++#undef INSN ++#undef patch_VLdSt ++ ++// ==================================== ++// RISC-V Bit-Manipulation Extension ++// Currently only support Zba and Zbb. ++// ==================================== ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME(Register Rd, Register Rs1, Register Rs2) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ patch_reg((address)&insn, 20, Rs2); \ ++ emit(insn); \ ++ } ++ ++ INSN(add_uw, 0b0111011, 0b000, 0b0000100); ++ INSN(rol, 0b0110011, 0b001, 0b0110000); ++ INSN(rolw, 0b0111011, 0b001, 0b0110000); ++ INSN(ror, 0b0110011, 0b101, 0b0110000); ++ INSN(rorw, 0b0111011, 0b101, 0b0110000); ++ INSN(sh1add, 0b0110011, 0b010, 0b0010000); ++ INSN(sh2add, 0b0110011, 0b100, 0b0010000); ++ INSN(sh3add, 0b0110011, 0b110, 0b0010000); ++ INSN(sh1add_uw, 0b0111011, 0b010, 0b0010000); ++ INSN(sh2add_uw, 0b0111011, 0b100, 0b0010000); ++ INSN(sh3add_uw, 0b0111011, 0b110, 0b0010000); ++ INSN(andn, 0b0110011, 0b111, 0b0100000); ++ INSN(orn, 0b0110011, 0b110, 0b0100000); ++ INSN(xnor, 0b0110011, 0b100, 0b0100000); ++ INSN(max, 0b0110011, 0b110, 0b0000101); ++ INSN(maxu, 0b0110011, 0b111, 0b0000101); ++ INSN(min, 0b0110011, 0b100, 0b0000101); ++ INSN(minu, 0b0110011, 0b101, 0b0000101); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, funct12) \ ++ void NAME(Register Rd, Register Rs1) { \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 31, 20, funct12); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(rev8, 0b0010011, 0b101, 0b011010111000); ++ INSN(sext_b, 0b0010011, 0b001, 0b011000000100); ++ INSN(sext_h, 0b0010011, 0b001, 0b011000000101); ++ INSN(zext_h, 0b0111011, 0b100, 0b000010000000); ++ INSN(clz, 0b0010011, 0b001, 0b011000000000); ++ INSN(clzw, 0b0011011, 0b001, 0b011000000000); ++ INSN(ctz, 0b0010011, 0b001, 0b011000000001); ++ INSN(ctzw, 0b0011011, 0b001, 0b011000000001); ++ INSN(cpop, 0b0010011, 0b001, 0b011000000010); ++ INSN(cpopw, 0b0011011, 0b001, 0b011000000010); ++ INSN(orc_b, 0b0010011, 0b101, 0b001010000111); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, funct6) \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt) {\ ++ guarantee(shamt <= 0x3f, "Shamt is invalid"); \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 25, 20, shamt); \ ++ patch((address)&insn, 31, 26, funct6); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(rori, 0b0010011, 0b101, 0b011000); ++ INSN(slli_uw, 0b0011011, 0b001, 0b000010); ++ ++#undef INSN ++ ++#define INSN(NAME, op, funct3, funct7) \ ++ void NAME(Register Rd, Register Rs1, unsigned shamt){ \ ++ guarantee(shamt <= 0x1f, "Shamt is invalid"); \ ++ unsigned insn = 0; \ ++ patch((address)&insn, 6, 0, op); \ ++ patch((address)&insn, 14, 12, funct3); \ ++ patch((address)&insn, 24, 20, shamt); \ ++ patch((address)&insn, 31, 25, funct7); \ ++ patch_reg((address)&insn, 7, Rd); \ ++ patch_reg((address)&insn, 15, Rs1); \ ++ emit(insn); \ ++ } ++ ++ INSN(roriw, 0b0011011, 0b101, 0b0110000); ++ ++#undef INSN ++ ++ void bgt(Register Rs, Register Rt, const address &dest); ++ void ble(Register Rs, Register Rt, const address &dest); ++ void bgtu(Register Rs, Register Rt, const address &dest); ++ void bleu(Register Rs, Register Rt, const address &dest); ++ void bgt(Register Rs, Register Rt, Label &l, bool is_far = false); ++ void ble(Register Rs, Register Rt, Label &l, bool is_far = false); ++ void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false); ++ void bleu(Register Rs, Register Rt, Label &l, bool is_far = false); ++ ++ typedef void (Assembler::* jal_jalr_insn)(Register Rt, address dest); ++ typedef void (Assembler::* load_insn_by_temp)(Register Rt, address dest, Register temp); ++ typedef void (Assembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest); ++ typedef void (Assembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far); ++ ++ void wrap_label(Register r1, Register r2, Label &L, compare_and_branch_insn insn, ++ compare_and_branch_label_insn neg_insn, bool is_far); ++ void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn); ++ void wrap_label(Register r, Label &L, jal_jalr_insn insn); ++ ++ // Computational pseudo instructions ++ void add(Register Rd, Register Rn, int64_t increment, Register temp = t0); ++ void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0); ++ ++ void sub(Register Rd, Register Rn, int64_t decrement, Register temp = t0); ++ void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0); ++ ++ // RVB pseudo instructions ++ // zero extend word ++ void zext_w(Register Rd, Register Rs); ++ ++ Assembler(CodeBuffer* code) : AbstractAssembler(code) { ++ } ++ ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ ShouldNotCallThis(); ++ return RegisterOrConstant(); ++ } ++ ++ // Stack overflow checking ++ virtual void bang_stack_with_offset(int offset) { Unimplemented(); } ++ ++ static bool operand_valid_for_add_immediate(long imm) { ++ return is_imm_in_range(imm, 12, 0); ++ } ++ ++ // The maximum range of a branch is fixed for the riscv ++ // architecture. ++ static const unsigned long branch_range = 1 * M; ++ ++ static bool reachable_from_branch_at(address branch, address target) { ++ return uabs(target - branch) < branch_range; ++ } ++ ++ static Assembler::SEW elemBytes_to_sew(int esize) { ++ assert(esize > 0 && esize <= 64 && is_power_of_2(esize), "unsupported element size"); ++ return (Assembler::SEW) exact_log2(esize); ++ } ++ ++ virtual ~Assembler() {} ++ ++}; ++ ++class BiasedLockingCounters; ++ ++#endif // CPU_RISCV_ASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp +new file mode 100644 +index 000000000..82b825db7 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/assembler_riscv.inline.hpp +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP ++#define CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++inline bool is_imm_in_range(long value, unsigned bits, unsigned align_bits) { ++ intx sign_bits = (value >> (bits + align_bits - 1)); ++ return ((value & right_n_bits(align_bits)) == 0) && ((sign_bits == 0) || (sign_bits == -1)); ++} ++ ++inline bool is_unsigned_imm_in_range(intx value, unsigned bits, unsigned align_bits) { ++ return (value >= 0) && ((value & right_n_bits(align_bits)) == 0) && ((value >> (align_bits + bits)) == 0); ++} ++ ++inline bool is_offset_in_range(intx offset, unsigned bits) { ++ return is_imm_in_range(offset, bits, 0); ++} ++ ++#endif // CPU_RISCV_ASSEMBLER_RISCV_INLINE_HPP +diff --git a/src/hotspot/cpu/riscv/bytes_riscv.hpp b/src/hotspot/cpu/riscv/bytes_riscv.hpp +new file mode 100644 +index 000000000..d0ac7ef46 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/bytes_riscv.hpp +@@ -0,0 +1,169 @@ ++/* ++ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2012, 2016 SAP SE. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_BYTES_RISCV_HPP ++#define CPU_RISCV_BYTES_RISCV_HPP ++ ++#include "memory/allocation.hpp" ++ ++class Bytes: AllStatic { ++ public: ++ // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering ++ // RISCV needs to check for alignment. ++ ++ // Forward declarations of the compiler-dependent implementation ++ static inline u2 swap_u2(u2 x); ++ static inline u4 swap_u4(u4 x); ++ static inline u8 swap_u8(u8 x); ++ ++ static inline u2 get_native_u2(address p) { ++ if ((intptr_t(p) & 1) == 0) { ++ return *(u2*)p; ++ } else { ++ return ((u2)(p[1]) << 8) | ++ ((u2)(p[0])); ++ } ++ } ++ ++ static inline u4 get_native_u4(address p) { ++ switch (intptr_t(p) & 3) { ++ case 0: ++ return *(u4*)p; ++ ++ case 2: ++ return ((u4)(((u2*)p)[1]) << 16) | ++ ((u4)(((u2*)p)[0])); ++ ++ default: ++ return ((u4)(p[3]) << 24) | ++ ((u4)(p[2]) << 16) | ++ ((u4)(p[1]) << 8) | ++ ((u4)(p[0])); ++ } ++ } ++ ++ static inline u8 get_native_u8(address p) { ++ switch (intptr_t(p) & 7) { ++ case 0: ++ return *(u8*)p; ++ ++ case 4: ++ return ((u8)(((u4*)p)[1]) << 32) | ++ ((u8)(((u4*)p)[0])); ++ ++ case 2: ++ case 6: ++ return ((u8)(((u2*)p)[3]) << 48) | ++ ((u8)(((u2*)p)[2]) << 32) | ++ ((u8)(((u2*)p)[1]) << 16) | ++ ((u8)(((u2*)p)[0])); ++ ++ default: ++ return ((u8)(p[7]) << 56) | ++ ((u8)(p[6]) << 48) | ++ ((u8)(p[5]) << 40) | ++ ((u8)(p[4]) << 32) | ++ ((u8)(p[3]) << 24) | ++ ((u8)(p[2]) << 16) | ++ ((u8)(p[1]) << 8) | ++ (u8)(p[0]); ++ } ++ } ++ ++ static inline void put_native_u2(address p, u2 x) { ++ if ((intptr_t(p) & 1) == 0) { ++ *(u2*)p = x; ++ } else { ++ p[1] = x >> 8; ++ p[0] = x; ++ } ++ } ++ ++ static inline void put_native_u4(address p, u4 x) { ++ switch (intptr_t(p) & 3) { ++ case 0: ++ *(u4*)p = x; ++ break; ++ ++ case 2: ++ ((u2*)p)[1] = x >> 16; ++ ((u2*)p)[0] = x; ++ break; ++ ++ default: ++ ((u1*)p)[3] = x >> 24; ++ ((u1*)p)[2] = x >> 16; ++ ((u1*)p)[1] = x >> 8; ++ ((u1*)p)[0] = x; ++ break; ++ } ++ } ++ ++ static inline void put_native_u8(address p, u8 x) { ++ switch (intptr_t(p) & 7) { ++ case 0: ++ *(u8*)p = x; ++ break; ++ ++ case 4: ++ ((u4*)p)[1] = x >> 32; ++ ((u4*)p)[0] = x; ++ break; ++ ++ case 2: ++ case 6: ++ ((u2*)p)[3] = x >> 48; ++ ((u2*)p)[2] = x >> 32; ++ ((u2*)p)[1] = x >> 16; ++ ((u2*)p)[0] = x; ++ break; ++ ++ default: ++ ((u1*)p)[7] = x >> 56; ++ ((u1*)p)[6] = x >> 48; ++ ((u1*)p)[5] = x >> 40; ++ ((u1*)p)[4] = x >> 32; ++ ((u1*)p)[3] = x >> 24; ++ ((u1*)p)[2] = x >> 16; ++ ((u1*)p)[1] = x >> 8; ++ ((u1*)p)[0] = x; ++ break; ++ } ++ } ++ ++ // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering) ++ static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } ++ static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } ++ static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } ++ ++ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } ++ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } ++ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } ++}; ++ ++#include OS_CPU_HEADER_INLINE(bytes) ++ ++#endif // CPU_RISCV_BYTES_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +new file mode 100644 +index 000000000..522eedd29 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_CodeStubs_riscv.cpp +@@ -0,0 +1,352 @@ ++/* ++ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "nativeInst_riscv.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_riscv.inline.hpp" ++ ++ ++#define __ ce->masm()-> ++ ++void CounterOverflowStub::emit_code(LIR_Assembler* ce) ++{ ++ __ bind(_entry); ++ Metadata *m = _method->as_constant_ptr()->as_metadata(); ++ __ mov_metadata(t0, m); ++ ce->store_parameter(t0, 1); ++ ce->store_parameter(_bci, 0); ++ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id))); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ __ j(_continuation); ++} ++ ++RangeCheckStub::RangeCheckStub(CodeEmitInfo *info, LIR_Opr index, LIR_Opr array) ++ : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) ++{ ++ assert(info != NULL, "must have info"); ++ _info = new CodeEmitInfo(info); ++} ++ ++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) ++ : _index(index), _array(NULL), _throw_index_out_of_bounds_exception(true) ++{ ++ assert(info != NULL, "must have info"); ++ _info = new CodeEmitInfo(info); ++} ++ ++void RangeCheckStub::emit_code(LIR_Assembler* ce) ++{ ++ __ bind(_entry); ++ if (_info->deoptimize_on_exception()) { ++ address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ __ far_call(RuntimeAddress(a)); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++ return; ++ } ++ ++ if (_index->is_cpu_register()) { ++ __ mv(t0, _index->as_register()); ++ } else { ++ __ mv(t0, _index->as_jint()); ++ } ++ Runtime1::StubID stub_id; ++ if (_throw_index_out_of_bounds_exception) { ++ stub_id = Runtime1::throw_index_exception_id; ++ } else { ++ assert(_array != NULL, "sanity"); ++ __ mv(t1, _array->as_pointer_register()); ++ stub_id = Runtime1::throw_range_check_failed_id; ++ } ++ int32_t off = 0; ++ __ la_patchable(ra, RuntimeAddress(Runtime1::entry_for(stub_id)), off); ++ __ jalr(ra, ra, off); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) ++{ ++ _info = new CodeEmitInfo(info); ++} ++ ++void PredicateFailedStub::emit_code(LIR_Assembler* ce) ++{ ++ __ bind(_entry); ++ address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ __ far_call(RuntimeAddress(a)); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void DivByZeroStub::emit_code(LIR_Assembler* ce) ++{ ++ if (_offset != -1) { ++ ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); ++ } ++ __ bind(_entry); ++ __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type)); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++#ifdef ASSERT ++ __ should_not_reach_here(); ++#endif ++} ++ ++// Implementation of NewInstanceStub ++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) ++{ ++ _result = result; ++ _klass = klass; ++ _klass_reg = klass_reg; ++ _info = new CodeEmitInfo(info); ++ assert(stub_id == Runtime1::new_instance_id || ++ stub_id == Runtime1::fast_new_instance_id || ++ stub_id == Runtime1::fast_new_instance_init_check_id, ++ "need new_instance id"); ++ _stub_id = stub_id; ++} ++ ++void NewInstanceStub::emit_code(LIR_Assembler* ce) ++{ ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ __ mv(x13, _klass_reg->as_register()); ++ __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id))); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == x10, "result must in x10"); ++ __ j(_continuation); ++} ++ ++// Implementation of NewTypeArrayStub ++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) ++{ ++ _klass_reg = klass_reg; ++ _length = length; ++ _result = result; ++ _info = new CodeEmitInfo(info); ++} ++ ++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) ++{ ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ assert(_length->as_register() == x9, "length must in x9"); ++ assert(_klass_reg->as_register() == x13, "klass_reg must in x13"); ++ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id))); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == x10, "result must in x10"); ++ __ j(_continuation); ++} ++ ++// Implementation of NewObjectArrayStub ++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) ++{ ++ _klass_reg = klass_reg; ++ _result = result; ++ _length = length; ++ _info = new CodeEmitInfo(info); ++} ++ ++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) ++{ ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ assert(_length->as_register() == x9, "length must in x9"); ++ assert(_klass_reg->as_register() == x13, "klass_reg must in x13"); ++ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id))); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ assert(_result->as_register() == x10, "result must in x10"); ++ __ j(_continuation); ++} ++ ++// Implementation of MonitorAccessStubs ++MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) ++: MonitorAccessStub(obj_reg, lock_reg) ++{ ++ _info = new CodeEmitInfo(info); ++} ++ ++void MonitorEnterStub::emit_code(LIR_Assembler* ce) ++{ ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ __ bind(_entry); ++ ce->store_parameter(_obj_reg->as_register(), 1); ++ ce->store_parameter(_lock_reg->as_register(), 0); ++ Runtime1::StubID enter_id; ++ if (ce->compilation()->has_fpu_code()) { ++ enter_id = Runtime1::monitorenter_id; ++ } else { ++ enter_id = Runtime1::monitorenter_nofpu_id; ++ } ++ __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id))); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ __ j(_continuation); ++} ++ ++void MonitorExitStub::emit_code(LIR_Assembler* ce) ++{ ++ __ bind(_entry); ++ if (_compute_lock) { ++ // lock_reg was destroyed by fast unlocking attempt => recompute it ++ ce->monitor_address(_monitor_ix, _lock_reg); ++ } ++ ce->store_parameter(_lock_reg->as_register(), 0); ++ // note: non-blocking leaf routine => no call info needed ++ Runtime1::StubID exit_id; ++ if (ce->compilation()->has_fpu_code()) { ++ exit_id = Runtime1::monitorexit_id; ++ } else { ++ exit_id = Runtime1::monitorexit_nofpu_id; ++ } ++ __ la(ra, _continuation); ++ __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id))); ++} ++ ++int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; ++ ++void PatchingStub::align_patch_site(MacroAssembler* masm) {} ++ ++// RISCV don't use C1 runtime patching. When need patch, just deoptimize. ++void PatchingStub::emit_code(LIR_Assembler* ce) ++{ ++ assert(false, "RISCV should not use C1 runtime patching"); ++} ++ ++void DeoptimizeStub::emit_code(LIR_Assembler* ce) ++{ ++ __ bind(_entry); ++ ce->store_parameter(_trap_request, 0); ++ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); ++ ce->add_call_info_here(_info); ++ DEBUG_ONLY(__ should_not_reach_here()); ++} ++ ++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) ++{ ++ address a = NULL; ++ if (_info->deoptimize_on_exception()) { ++ // Deoptimize, do not throw the exception, because it is probably wrong to do it here. ++ a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); ++ } else { ++ a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); ++ } ++ ++ ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); ++ __ bind(_entry); ++ __ far_call(RuntimeAddress(a)); ++ ce->add_call_info_here(_info); ++ ce->verify_oop_map(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) ++{ ++ assert(__ rsp_offset() == 0, "frame size should be fixed"); ++ ++ __ bind(_entry); ++ // pass the object in a tmp register because all other registers ++ // must be preserved ++ if (_obj->is_cpu_register()) { ++ __ mv(t0, _obj->as_register()); ++ } ++ __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), t1); ++ ce->add_call_info_here(_info); ++ debug_only(__ should_not_reach_here()); ++} ++ ++void ArrayCopyStub::emit_code(LIR_Assembler* ce) ++{ ++ // ---------------slow case: call to native----------------- ++ __ bind(_entry); ++ // Figure out where the args should go ++ // This should really convert the IntrinsicID to the Method* and signature ++ // but I don't know how to do that. ++ // ++ const int args_num = 5; ++ VMRegPair args[args_num]; ++ BasicType signature[args_num] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT }; ++ SharedRuntime::java_calling_convention(signature, args, args_num, true); ++ ++ // push parameters ++ Register r[args_num]; ++ int i = 0; ++ r[i++] = src()->as_register(); ++ r[i++] = src_pos()->as_register(); ++ r[i++] = dst()->as_register(); ++ r[i++] = dst_pos()->as_register(); ++ r[i++] = length()->as_register(); ++ ++ // next registers will get stored on the stack ++ for (int j = 0; j < args_num; j++) { ++ VMReg r_1 = args[j].first(); ++ if (r_1->is_stack()) { ++ int st_off = r_1->reg2stack() * wordSize; ++ __ sd(r[j], Address(sp, st_off)); ++ } else { ++ assert(r[j] == args[j].first()->as_Register(), "Wrong register for arg "); ++ } ++ } ++ ++ ce->align_call(lir_static_call); ++ ++ ce->emit_static_call_stub(); ++ if (ce->compilation()->bailed_out()) { ++ return; // CodeCache is full ++ } ++ Address resolve(SharedRuntime::get_resolve_static_call_stub(), ++ relocInfo::static_call_type); ++ address call = __ trampoline_call(resolve); ++ if (call == NULL) { ++ ce->bailout("trampoline stub overflow"); ++ return; ++ } ++ ce->add_call_info_here(info()); ++ ++#ifndef PRODUCT ++ __ la(t1, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); ++ __ incrementw(Address(t1)); ++#endif ++ ++ __ j(_continuation); ++} ++ ++#undef __ +diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp +new file mode 100644 +index 000000000..a0f411352 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp +@@ -0,0 +1,85 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_C1_DEFS_RISCV_HPP ++#define CPU_RISCV_C1_DEFS_RISCV_HPP ++ ++// native word offsets from memory address (little endian) ++enum { ++ pd_lo_word_offset_in_bytes = 0, ++ pd_hi_word_offset_in_bytes = BytesPerWord ++}; ++ ++// explicit rounding operations are required to implement the strictFP mode ++enum { ++ pd_strict_fp_requires_explicit_rounding = false ++}; ++ ++// registers ++enum { ++ pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, // number of registers used during code emission ++ pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of float registers used during code emission ++ ++ // caller saved ++ pd_nof_caller_save_cpu_regs_frame_map = 13, // number of registers killed by calls ++ pd_nof_caller_save_fpu_regs_frame_map = 32, // number of float registers killed by calls ++ ++ pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map, ++ pd_last_callee_saved_reg = 21, ++ ++ pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1, ++ ++ pd_nof_cpu_regs_reg_alloc ++ = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator ++ pd_nof_fpu_regs_reg_alloc = 32, // number of float registers that are visible to register allocator ++ ++ pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan ++ pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of float registers visible to linear scan ++ pd_nof_xmm_regs_linearscan = 0, // like sparc we don't have any of these ++ ++ pd_first_cpu_reg = 0, ++ pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1, ++ pd_first_byte_reg = 0, ++ pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1, ++ ++ pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, ++ pd_last_fpu_reg = pd_first_fpu_reg + 31, ++ ++ pd_first_callee_saved_fpu_reg_1 = 8 + pd_first_fpu_reg, ++ pd_last_callee_saved_fpu_reg_1 = 9 + pd_first_fpu_reg, ++ pd_first_callee_saved_fpu_reg_2 = 18 + pd_first_fpu_reg, ++ pd_last_callee_saved_fpu_reg_2 = 27 + pd_first_fpu_reg ++}; ++ ++ ++// Encoding of float value in debug info. This is true on x86 where ++// floats are extended to doubles when stored in the stack, false for ++// RISCV where floats and doubles are stored in their native form. ++enum { ++ pd_float_saved_as_double = false ++}; ++ ++#endif // CPU_RISCV_C1_DEFS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp +new file mode 100644 +index 000000000..d4876625c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++//-------------------------------------------------------- ++// FpuStackSim ++//-------------------------------------------------------- ++ ++// No FPU stack on RISCV +diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp +new file mode 100644 +index 000000000..4b43bc4d7 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP ++#define CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP ++ ++// No FPU stack on RISCV ++class FpuStackSim; ++ ++#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp +new file mode 100644 +index 000000000..94b4e0f0b +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.cpp +@@ -0,0 +1,391 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_LIR.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_riscv.inline.hpp" ++ ++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) ++{ ++ LIR_Opr opr = LIR_OprFact::illegalOpr; ++ VMReg r_1 = reg->first(); ++ VMReg r_2 = reg->second(); ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset ++ // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value ++ // so we must add it in here. ++ int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++ opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type)); ++ } else if (r_1->is_Register()) { ++ Register reg1 = r_1->as_Register(); ++ if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { ++ Register reg2 = r_2->as_Register(); ++ assert(reg2 == reg1, "must be same register"); ++ opr = as_long_opr(reg1); ++ } else if (type == T_OBJECT || type == T_ARRAY) { ++ opr = as_oop_opr(reg1); ++ } else if (type == T_METADATA) { ++ opr = as_metadata_opr(reg1); ++ } else if (type == T_ADDRESS) { ++ opr = as_address_opr(reg1); ++ } else { ++ opr = as_opr(reg1); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); ++ int num = r_1->as_FloatRegister()->encoding(); ++ if (type == T_FLOAT) { ++ opr = LIR_OprFact::single_fpu(num); ++ } else { ++ opr = LIR_OprFact::double_fpu(num); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++ return opr; ++} ++ ++LIR_Opr FrameMap::zr_opr; ++LIR_Opr FrameMap::r1_opr; ++LIR_Opr FrameMap::r2_opr; ++LIR_Opr FrameMap::r3_opr; ++LIR_Opr FrameMap::r4_opr; ++LIR_Opr FrameMap::r5_opr; ++LIR_Opr FrameMap::r6_opr; ++LIR_Opr FrameMap::r7_opr; ++LIR_Opr FrameMap::r8_opr; ++LIR_Opr FrameMap::r9_opr; ++LIR_Opr FrameMap::r10_opr; ++LIR_Opr FrameMap::r11_opr; ++LIR_Opr FrameMap::r12_opr; ++LIR_Opr FrameMap::r13_opr; ++LIR_Opr FrameMap::r14_opr; ++LIR_Opr FrameMap::r15_opr; ++LIR_Opr FrameMap::r16_opr; ++LIR_Opr FrameMap::r17_opr; ++LIR_Opr FrameMap::r18_opr; ++LIR_Opr FrameMap::r19_opr; ++LIR_Opr FrameMap::r20_opr; ++LIR_Opr FrameMap::r21_opr; ++LIR_Opr FrameMap::r22_opr; ++LIR_Opr FrameMap::r23_opr; ++LIR_Opr FrameMap::r24_opr; ++LIR_Opr FrameMap::r25_opr; ++LIR_Opr FrameMap::r26_opr; ++LIR_Opr FrameMap::r27_opr; ++LIR_Opr FrameMap::r28_opr; ++LIR_Opr FrameMap::r29_opr; ++LIR_Opr FrameMap::r30_opr; ++LIR_Opr FrameMap::r31_opr; ++ ++LIR_Opr FrameMap::fp_opr; ++LIR_Opr FrameMap::sp_opr; ++ ++LIR_Opr FrameMap::receiver_opr; ++ ++LIR_Opr FrameMap::zr_oop_opr; ++LIR_Opr FrameMap::r1_oop_opr; ++LIR_Opr FrameMap::r2_oop_opr; ++LIR_Opr FrameMap::r3_oop_opr; ++LIR_Opr FrameMap::r4_oop_opr; ++LIR_Opr FrameMap::r5_oop_opr; ++LIR_Opr FrameMap::r6_oop_opr; ++LIR_Opr FrameMap::r7_oop_opr; ++LIR_Opr FrameMap::r8_oop_opr; ++LIR_Opr FrameMap::r9_oop_opr; ++LIR_Opr FrameMap::r10_oop_opr; ++LIR_Opr FrameMap::r11_oop_opr; ++LIR_Opr FrameMap::r12_oop_opr; ++LIR_Opr FrameMap::r13_oop_opr; ++LIR_Opr FrameMap::r14_oop_opr; ++LIR_Opr FrameMap::r15_oop_opr; ++LIR_Opr FrameMap::r16_oop_opr; ++LIR_Opr FrameMap::r17_oop_opr; ++LIR_Opr FrameMap::r18_oop_opr; ++LIR_Opr FrameMap::r19_oop_opr; ++LIR_Opr FrameMap::r20_oop_opr; ++LIR_Opr FrameMap::r21_oop_opr; ++LIR_Opr FrameMap::r22_oop_opr; ++LIR_Opr FrameMap::r23_oop_opr; ++LIR_Opr FrameMap::r24_oop_opr; ++LIR_Opr FrameMap::r25_oop_opr; ++LIR_Opr FrameMap::r26_oop_opr; ++LIR_Opr FrameMap::r27_oop_opr; ++LIR_Opr FrameMap::r28_oop_opr; ++LIR_Opr FrameMap::r29_oop_opr; ++LIR_Opr FrameMap::r30_oop_opr; ++LIR_Opr FrameMap::r31_oop_opr; ++ ++LIR_Opr FrameMap::t0_opr; ++LIR_Opr FrameMap::t1_opr; ++LIR_Opr FrameMap::t0_long_opr; ++LIR_Opr FrameMap::t1_long_opr; ++ ++LIR_Opr FrameMap::r10_metadata_opr; ++LIR_Opr FrameMap::r11_metadata_opr; ++LIR_Opr FrameMap::r12_metadata_opr; ++LIR_Opr FrameMap::r13_metadata_opr; ++LIR_Opr FrameMap::r14_metadata_opr; ++LIR_Opr FrameMap::r15_metadata_opr; ++ ++LIR_Opr FrameMap::long10_opr; ++LIR_Opr FrameMap::long11_opr; ++LIR_Opr FrameMap::fpu10_float_opr; ++LIR_Opr FrameMap::fpu10_double_opr; ++ ++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; ++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; ++ ++//-------------------------------------------------------- ++// FrameMap ++//-------------------------------------------------------- ++// |---f31--| ++// |---..---| ++// |---f28--| ++// |---f27--|<---pd_last_callee_saved_fpu_reg_2 ++// |---..---| ++// |---f18--|<---pd_first_callee_saved_fpu_reg_2 ++// |---f17--| ++// |---..---| ++// |---f10--| ++// |---f9---|<---pd_last_callee_saved_fpu_reg_1 ++// |---f8---|<---pd_first_callee_saved_fpu_reg_1 ++// |---f7---| ++// |---..---| ++// |---f0---| ++// |---x27--| ++// |---x23--| ++// |---x8---| ++// |---x4---| ++// |---x3---| ++// |---x2---| ++// |---x1---| ++// |---x0---| ++// |---x26--|<---pd_last_callee_saved_reg ++// |---..---| ++// |---x18--| ++// |---x9---|<---pd_first_callee_saved_reg ++// |---x31--| ++// |---..---| ++// |---x28--| ++// |---x17--| ++// |---..---| ++// |---x10--| ++// |---x7---| ++ ++void FrameMap::initialize() { ++ assert(!_init_done, "once"); ++ ++ int i = 0; ++ ++ // caller save register ++ map_register(i, x7); r7_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x10); r10_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x11); r11_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x12); r12_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x13); r13_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x14); r14_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x15); r15_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x16); r16_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x17); r17_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x28); r28_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x29); r29_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x30); r30_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x31); r31_opr = LIR_OprFact::single_cpu(i); i++; ++ ++ // callee save register ++ map_register(i, x9); r9_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x18); r18_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x19); r19_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x20); r20_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x21); r21_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x22); r22_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x24); r24_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x25); r25_opr = LIR_OprFact::single_cpu(i); i++; ++ map_register(i, x26); r26_opr = LIR_OprFact::single_cpu(i); i++; ++ ++ // special register ++ map_register(i, x0); zr_opr = LIR_OprFact::single_cpu(i); i++; // zr ++ map_register(i, x1); r1_opr = LIR_OprFact::single_cpu(i); i++; // ra ++ map_register(i, x2); r2_opr = LIR_OprFact::single_cpu(i); i++; // sp ++ map_register(i, x3); r3_opr = LIR_OprFact::single_cpu(i); i++; // gp ++ map_register(i, x4); r4_opr = LIR_OprFact::single_cpu(i); i++; // thread ++ map_register(i, x8); r8_opr = LIR_OprFact::single_cpu(i); i++; // fp ++ map_register(i, x23); r23_opr = LIR_OprFact::single_cpu(i); i++; // java thread ++ map_register(i, x27); r27_opr = LIR_OprFact::single_cpu(i); i++; // heapbase ++ ++ // tmp register ++ map_register(i, x5); r5_opr = LIR_OprFact::single_cpu(i); i++; // t0 ++ map_register(i, x6); r6_opr = LIR_OprFact::single_cpu(i); i++; // t1 ++ ++ t0_opr = r5_opr; ++ t1_opr = r6_opr; ++ t0_long_opr = LIR_OprFact::double_cpu(r5_opr->cpu_regnr(), r5_opr->cpu_regnr()); ++ t1_long_opr = LIR_OprFact::double_cpu(r6_opr->cpu_regnr(), r6_opr->cpu_regnr()); ++ ++ long10_opr = LIR_OprFact::double_cpu(r10_opr->cpu_regnr(), r10_opr->cpu_regnr()); ++ long11_opr = LIR_OprFact::double_cpu(r11_opr->cpu_regnr(), r11_opr->cpu_regnr()); ++ ++ fpu10_float_opr = LIR_OprFact::single_fpu(10); ++ fpu10_double_opr = LIR_OprFact::double_fpu(10); ++ ++ i = 0; ++ _caller_save_cpu_regs[i++] = r7_opr; ++ _caller_save_cpu_regs[i++] = r10_opr; ++ _caller_save_cpu_regs[i++] = r11_opr; ++ _caller_save_cpu_regs[i++] = r12_opr; ++ _caller_save_cpu_regs[i++] = r13_opr; ++ _caller_save_cpu_regs[i++] = r14_opr; ++ _caller_save_cpu_regs[i++] = r15_opr; ++ _caller_save_cpu_regs[i++] = r16_opr; ++ _caller_save_cpu_regs[i++] = r17_opr; ++ _caller_save_cpu_regs[i++] = r28_opr; ++ _caller_save_cpu_regs[i++] = r29_opr; ++ _caller_save_cpu_regs[i++] = r30_opr; ++ _caller_save_cpu_regs[i++] = r31_opr; ++ ++ _init_done = true; ++ ++ zr_oop_opr = as_oop_opr(x0); ++ r1_oop_opr = as_oop_opr(x1); ++ r2_oop_opr = as_oop_opr(x2); ++ r3_oop_opr = as_oop_opr(x3); ++ r4_oop_opr = as_oop_opr(x4); ++ r5_oop_opr = as_oop_opr(x5); ++ r6_oop_opr = as_oop_opr(x6); ++ r7_oop_opr = as_oop_opr(x7); ++ r8_oop_opr = as_oop_opr(x8); ++ r9_oop_opr = as_oop_opr(x9); ++ r10_oop_opr = as_oop_opr(x10); ++ r11_oop_opr = as_oop_opr(x11); ++ r12_oop_opr = as_oop_opr(x12); ++ r13_oop_opr = as_oop_opr(x13); ++ r14_oop_opr = as_oop_opr(x14); ++ r15_oop_opr = as_oop_opr(x15); ++ r16_oop_opr = as_oop_opr(x16); ++ r17_oop_opr = as_oop_opr(x17); ++ r18_oop_opr = as_oop_opr(x18); ++ r19_oop_opr = as_oop_opr(x19); ++ r20_oop_opr = as_oop_opr(x20); ++ r21_oop_opr = as_oop_opr(x21); ++ r22_oop_opr = as_oop_opr(x22); ++ r23_oop_opr = as_oop_opr(x23); ++ r24_oop_opr = as_oop_opr(x24); ++ r25_oop_opr = as_oop_opr(x25); ++ r26_oop_opr = as_oop_opr(x26); ++ r27_oop_opr = as_oop_opr(x27); ++ r28_oop_opr = as_oop_opr(x28); ++ r29_oop_opr = as_oop_opr(x29); ++ r30_oop_opr = as_oop_opr(x30); ++ r31_oop_opr = as_oop_opr(x31); ++ ++ r10_metadata_opr = as_metadata_opr(x10); ++ r11_metadata_opr = as_metadata_opr(x11); ++ r12_metadata_opr = as_metadata_opr(x12); ++ r13_metadata_opr = as_metadata_opr(x13); ++ r14_metadata_opr = as_metadata_opr(x14); ++ r15_metadata_opr = as_metadata_opr(x15); ++ ++ sp_opr = as_pointer_opr(sp); ++ fp_opr = as_pointer_opr(fp); ++ ++ VMRegPair regs; ++ BasicType sig_bt = T_OBJECT; ++ SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); ++ receiver_opr = as_oop_opr(regs.first()->as_Register()); ++ ++ for (i = 0; i < nof_caller_save_fpu_regs; i++) { ++ _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); ++ } ++} ++ ++ ++Address FrameMap::make_new_address(ByteSize sp_offset) const { ++ return Address(sp, in_bytes(sp_offset)); ++} ++ ++ ++// ----------------mapping----------------------- ++// all mapping is based on rfp addressing, except for simple leaf methods where we access ++// the locals sp based (and no frame is built) ++ ++ ++// Frame for simple leaf methods (quick entries) ++// ++// +----------+ ++// | ret addr | <- TOS ++// +----------+ ++// | args | ++// | ...... | ++ ++// Frame for standard methods ++// ++// | .........| <- TOS ++// | locals | ++// +----------+ ++// | old fp, | ++// +----------+ ++// | ret addr | ++// +----------+ ++// | args | <- FP ++// | .........| ++ ++ ++// For OopMaps, map a local variable or spill index to an VMRegImpl name. ++// This is the offset from sp() in the frame of the slot for the index, ++// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.) ++// ++// framesize + ++// stack0 stack0 0 <- VMReg ++// | | | ++// ...........|..............|.............| ++// 0 1 2 3 x x 4 5 6 ... | <- local indices ++// ^ ^ sp() ( x x indicate link ++// | | and return addr) ++// arguments non-argument locals ++ ++ ++VMReg FrameMap::fpu_regname (int n) { ++ // Return the OptoReg name for the fpu stack slot "n" ++ // A spilled fpu stack slot comprises to two single-word OptoReg's. ++ return as_FloatRegister(n)->as_VMReg(); ++} ++ ++LIR_Opr FrameMap::stack_pointer() ++{ ++ return FrameMap::sp_opr; ++} ++ ++// JSR 292 ++LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { ++ return LIR_OprFact::illegalOpr; // Not needed on riscv ++} ++ ++bool FrameMap::validate_frame() { ++ return true; ++} +diff --git a/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp +new file mode 100644 +index 000000000..f600c2f6f +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_FrameMap_riscv.hpp +@@ -0,0 +1,149 @@ ++/* ++ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_C1_FRAMEMAP_RISCV_HPP ++#define CPU_RISCV_C1_FRAMEMAP_RISCV_HPP ++ ++// On RISCV the frame looks as follows: ++// ++// +-----------------------------+---------+----------------------------------------+----------------+----------- ++// | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling . ++// +-----------------------------+---------+----------------------------------------+----------------+----------- ++ ++ public: ++ static const int pd_c_runtime_reserved_arg_size; ++ ++ enum { ++ first_available_sp_in_frame = 0, ++ frame_pad_in_bytes = 16, ++ nof_reg_args = 8 ++ }; ++ ++ public: ++ static LIR_Opr receiver_opr; ++ ++ static LIR_Opr zr_opr; ++ static LIR_Opr r1_opr; ++ static LIR_Opr r2_opr; ++ static LIR_Opr r3_opr; ++ static LIR_Opr r4_opr; ++ static LIR_Opr r5_opr; ++ static LIR_Opr r6_opr; ++ static LIR_Opr r7_opr; ++ static LIR_Opr r8_opr; ++ static LIR_Opr r9_opr; ++ static LIR_Opr r10_opr; ++ static LIR_Opr r11_opr; ++ static LIR_Opr r12_opr; ++ static LIR_Opr r13_opr; ++ static LIR_Opr r14_opr; ++ static LIR_Opr r15_opr; ++ static LIR_Opr r16_opr; ++ static LIR_Opr r17_opr; ++ static LIR_Opr r18_opr; ++ static LIR_Opr r19_opr; ++ static LIR_Opr r20_opr; ++ static LIR_Opr r21_opr; ++ static LIR_Opr r22_opr; ++ static LIR_Opr r23_opr; ++ static LIR_Opr r24_opr; ++ static LIR_Opr r25_opr; ++ static LIR_Opr r26_opr; ++ static LIR_Opr r27_opr; ++ static LIR_Opr r28_opr; ++ static LIR_Opr r29_opr; ++ static LIR_Opr r30_opr; ++ static LIR_Opr r31_opr; ++ static LIR_Opr fp_opr; ++ static LIR_Opr sp_opr; ++ ++ static LIR_Opr zr_oop_opr; ++ static LIR_Opr r1_oop_opr; ++ static LIR_Opr r2_oop_opr; ++ static LIR_Opr r3_oop_opr; ++ static LIR_Opr r4_oop_opr; ++ static LIR_Opr r5_oop_opr; ++ static LIR_Opr r6_oop_opr; ++ static LIR_Opr r7_oop_opr; ++ static LIR_Opr r8_oop_opr; ++ static LIR_Opr r9_oop_opr; ++ static LIR_Opr r10_oop_opr; ++ static LIR_Opr r11_oop_opr; ++ static LIR_Opr r12_oop_opr; ++ static LIR_Opr r13_oop_opr; ++ static LIR_Opr r14_oop_opr; ++ static LIR_Opr r15_oop_opr; ++ static LIR_Opr r16_oop_opr; ++ static LIR_Opr r17_oop_opr; ++ static LIR_Opr r18_oop_opr; ++ static LIR_Opr r19_oop_opr; ++ static LIR_Opr r20_oop_opr; ++ static LIR_Opr r21_oop_opr; ++ static LIR_Opr r22_oop_opr; ++ static LIR_Opr r23_oop_opr; ++ static LIR_Opr r24_oop_opr; ++ static LIR_Opr r25_oop_opr; ++ static LIR_Opr r26_oop_opr; ++ static LIR_Opr r27_oop_opr; ++ static LIR_Opr r28_oop_opr; ++ static LIR_Opr r29_oop_opr; ++ static LIR_Opr r30_oop_opr; ++ static LIR_Opr r31_oop_opr; ++ ++ static LIR_Opr t0_opr; ++ static LIR_Opr t1_opr; ++ static LIR_Opr t0_long_opr; ++ static LIR_Opr t1_long_opr; ++ ++ static LIR_Opr r10_metadata_opr; ++ static LIR_Opr r11_metadata_opr; ++ static LIR_Opr r12_metadata_opr; ++ static LIR_Opr r13_metadata_opr; ++ static LIR_Opr r14_metadata_opr; ++ static LIR_Opr r15_metadata_opr; ++ ++ static LIR_Opr long10_opr; ++ static LIR_Opr long11_opr; ++ static LIR_Opr fpu10_float_opr; ++ static LIR_Opr fpu10_double_opr; ++ ++ static LIR_Opr as_long_opr(Register r) { ++ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); ++ } ++ static LIR_Opr as_pointer_opr(Register r) { ++ return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); ++ } ++ ++ // VMReg name for spilled physical FPU stack slot n ++ static VMReg fpu_regname(int n); ++ ++ static bool is_caller_save_register(LIR_Opr opr) { return true; } ++ static bool is_caller_save_register(Register r) { return true; } ++ ++ static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; } ++ static int last_cpu_reg() { return pd_last_cpu_reg; } ++ ++#endif // CPU_RISCV_C1_FRAMEMAP_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp +new file mode 100644 +index 000000000..a846d60ae +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.cpp +@@ -0,0 +1,287 @@ ++/* ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++ ++#ifndef PRODUCT ++#define COMMENT(x) do { __ block_comment(x); } while (0) ++#else ++#define COMMENT(x) ++#endif ++ ++#define __ _masm-> ++ ++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) { ++ ++ // opcode check ++ assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem"); ++ bool is_irem = (code == lir_irem); ++ ++ // operand check ++ assert(left->is_single_cpu(), "left must be register"); ++ assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant"); ++ assert(result->is_single_cpu(), "result must be register"); ++ Register lreg = left->as_register(); ++ Register dreg = result->as_register(); ++ ++ // power-of-2 constant check and codegen ++ if (right->is_constant()) { ++ int c = right->as_constant_ptr()->as_jint(); ++ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ if (is_irem) { ++ if (c == 1) { ++ // move 0 to dreg if divisor is 1 ++ __ mv(dreg, zr); ++ } else { ++ unsigned int shift = exact_log2(c); ++ __ sraiw(t0, lreg, 0x1f); ++ __ srliw(t0, t0, BitsPerInt - shift); ++ __ addw(t1, lreg, t0); ++ if (is_imm_in_range(c - 1, 12, 0)) { ++ __ andi(t1, t1, c - 1); ++ } else { ++ __ zero_extend(t1, t1, shift); ++ } ++ __ subw(dreg, t1, t0); ++ } ++ } else { ++ if (c == 1) { ++ // move lreg to dreg if divisor is 1 ++ __ mv(dreg, lreg); ++ } else { ++ unsigned int shift = exact_log2(c); ++ __ sraiw(t0, lreg, 0x1f); ++ if (is_imm_in_range(c - 1, 12, 0)) { ++ __ andi(t0, t0, c - 1); ++ } else { ++ __ zero_extend(t0, t0, shift); ++ } ++ __ addw(dreg, t0, lreg); ++ __ sraiw(dreg, dreg, shift); ++ } ++ } ++ } else { ++ Register rreg = right->as_register(); ++ __ corrected_idivl(dreg, lreg, rreg, is_irem); ++ } ++} ++ ++void LIR_Assembler::arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, ++ Register lreg, Register dreg) { ++ // cpu register - constant ++ jlong c; ++ ++ switch (right->type()) { ++ case T_LONG: ++ c = right->as_constant_ptr()->as_jlong(); break; ++ case T_INT: // fall through ++ case T_ADDRESS: ++ c = right->as_constant_ptr()->as_jint(); break; ++ default: ++ ShouldNotReachHere(); ++ c = 0; // unreachable ++ } ++ ++ assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); ++ if (c == 0 && dreg == lreg) { ++ COMMENT("effective nop elided"); ++ return; ++ } ++ switch (left->type()) { ++ case T_INT: ++ switch (code) { ++ case lir_add: __ addw(dreg, lreg, c); break; ++ case lir_sub: __ subw(dreg, lreg, c); break; ++ default: ShouldNotReachHere(); ++ } ++ break; ++ case T_OBJECT: // fall through ++ case T_ADDRESS: ++ switch (code) { ++ case lir_add: __ add(dreg, lreg, c); break; ++ case lir_sub: __ sub(dreg, lreg, c); break; ++ default: ShouldNotReachHere(); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::arith_op_single_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { ++ Register lreg = left->as_register(); ++ Register dreg = as_reg(dest); ++ ++ if (right->is_single_cpu()) { ++ // cpu register - cpu register ++ assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be"); ++ Register rreg = right->as_register(); ++ switch (code) { ++ case lir_add: __ addw(dest->as_register(), lreg, rreg); break; ++ case lir_sub: __ subw(dest->as_register(), lreg, rreg); break; ++ case lir_mul: __ mulw(dest->as_register(), lreg, rreg); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (right->is_double_cpu()) { ++ Register rreg = right->as_register_lo(); ++ // sigle_cpu + double_cpu; can happen with obj_long ++ assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); ++ switch (code) { ++ case lir_add: __ add(dreg, lreg, rreg); break; ++ case lir_sub: __ sub(dreg, lreg, rreg); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (right->is_constant()) { ++ arith_op_single_cpu_right_constant(code, left, right, lreg, dreg); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { ++ Register lreg_lo = left->as_register_lo(); ++ ++ if (right->is_double_cpu()) { ++ // cpu register - cpu register ++ Register rreg_lo = right->as_register_lo(); ++ switch (code) { ++ case lir_add: __ add(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_sub: __ sub(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_mul: __ mul(dest->as_register_lo(), lreg_lo, rreg_lo); break; ++ case lir_div: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, false); break; ++ case lir_rem: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, true); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else if (right->is_constant()) { ++ jlong c = right->as_constant_ptr()->as_jlong(); ++ Register dreg = as_reg(dest); ++ switch (code) { ++ case lir_add: ++ case lir_sub: ++ if (c == 0 && dreg == lreg_lo) { ++ COMMENT("effective nop elided"); ++ return; ++ } ++ code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c); ++ break; ++ case lir_div: ++ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ if (c == 1) { ++ // move lreg_lo to dreg if divisor is 1 ++ __ mv(dreg, lreg_lo); ++ } else { ++ unsigned int shift = exact_log2(c); ++ // use t0 as intermediate result register ++ __ srai(t0, lreg_lo, 0x3f); ++ if (is_imm_in_range(c - 1, 12, 0)) { ++ __ andi(t0, t0, c - 1); ++ } else { ++ __ zero_extend(t0, t0, shift); ++ } ++ __ add(dreg, t0, lreg_lo); ++ __ srai(dreg, dreg, shift); ++ } ++ break; ++ case lir_rem: ++ assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); ++ if (c == 1) { ++ // move 0 to dreg if divisor is 1 ++ __ mv(dreg, zr); ++ } else { ++ unsigned int shift = exact_log2(c); ++ __ srai(t0, lreg_lo, 0x3f); ++ __ srli(t0, t0, BitsPerLong - shift); ++ __ add(t1, lreg_lo, t0); ++ if (is_imm_in_range(c - 1, 12, 0)) { ++ __ andi(t1, t1, c - 1); ++ } else { ++ __ zero_extend(t1, t1, shift); ++ } ++ __ sub(dreg, t1, t0); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { ++ assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register"); ++ switch (code) { ++ case lir_add: __ fadd_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_sub: __ fsub_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_mul_strictfp: // fall through ++ case lir_mul: __ fmul_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ case lir_div_strictfp: // fall through ++ case lir_div: __ fdiv_s(dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) { ++ if (right->is_double_fpu()) { ++ // fpu register - fpu register ++ switch (code) { ++ case lir_add: __ fadd_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_sub: __ fsub_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_mul_strictfp: // fall through ++ case lir_mul: __ fmul_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ case lir_div_strictfp: // fall through ++ case lir_div: __ fdiv_d(dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, ++ CodeEmitInfo* info, bool pop_fpu_stack) { ++ assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); ++ ++ if (left->is_single_cpu()) { ++ arith_op_single_cpu(code, left, right, dest); ++ } else if (left->is_double_cpu()) { ++ arith_op_double_cpu(code, left, right, dest); ++ } else if (left->is_single_fpu()) { ++ arith_op_single_fpu(code, left, right, dest); ++ } else if (left->is_double_fpu()) { ++ arith_op_double_fpu(code, left, right, dest); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++#undef __ +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp +new file mode 100644 +index 000000000..93530ef58 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arith_riscv.hpp +@@ -0,0 +1,36 @@ ++/* ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP ++#define CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP ++ ++ // arith_op sub functions ++ void arith_op_single_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); ++ void arith_op_double_cpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); ++ void arith_op_single_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); ++ void arith_op_double_fpu(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); ++ void arith_op_single_cpu_right_constant(LIR_Code code, LIR_Opr left, LIR_Opr right, Register lreg, Register dreg); ++ void arithmetic_idiv(LIR_Op3* op, bool is_irem); ++#endif // CPU_RISCV_C1_LIRASSEMBLER_ARITH_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp +new file mode 100644 +index 000000000..31f8d6a4a +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.cpp +@@ -0,0 +1,387 @@ ++/* ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "ci/ciArrayKlass.hpp" ++#include "oops/objArrayKlass.hpp" ++ ++#define __ _masm-> ++ ++ ++void LIR_Assembler::generic_arraycopy(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos, CodeStub *stub) { ++ assert(src == x11 && src_pos == x12, "mismatch in calling convention"); ++ // Save the arguments in case the generic arraycopy fails and we ++ // have to fall back to the JNI stub ++ arraycopy_store_args(src, src_pos, length, dst, dst_pos); ++ ++ address copyfunc_addr = StubRoutines::generic_arraycopy(); ++ assert(copyfunc_addr != NULL, "generic arraycopy stub required"); ++ ++ // The arguments are in java calling convention so we shift them ++ // to C convention ++ assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4); ++ __ mv(c_rarg0, j_rarg0); ++ assert_different_registers(c_rarg1, j_rarg2, j_rarg3, j_rarg4); ++ __ mv(c_rarg1, j_rarg1); ++ assert_different_registers(c_rarg2, j_rarg3, j_rarg4); ++ __ mv(c_rarg2, j_rarg2); ++ assert_different_registers(c_rarg3, j_rarg4); ++ __ mv(c_rarg3, j_rarg3); ++ __ mv(c_rarg4, j_rarg4); ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt)); ++ } ++#endif ++ __ far_call(RuntimeAddress(copyfunc_addr)); ++ __ beqz(x10, *stub->continuation()); ++ // Reload values from the stack so they are where the stub ++ // expects them. ++ arraycopy_load_args(src, src_pos, length, dst, dst_pos); ++ ++ // x10 is -1^K where K == partial copied count ++ __ xori(t0, x10, -1); ++ // adjust length down and src/end pos up by partial copied count ++ __ subw(length, length, t0); ++ __ addw(src_pos, src_pos, t0); ++ __ addw(dst_pos, dst_pos, t0); ++ __ j(*stub->entry()); ++ ++ __ bind(*stub->continuation()); ++} ++ ++void LIR_Assembler::arraycopy_simple_check(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos, Register tmp, ++ CodeStub *stub, int flags) { ++ // test for NULL ++ if (flags & LIR_OpArrayCopy::src_null_check) { ++ __ beqz(src, *stub->entry(), /* is_far */ true); ++ } ++ if (flags & LIR_OpArrayCopy::dst_null_check) { ++ __ beqz(dst, *stub->entry(), /* is_far */ true); ++ } ++ ++ // If the compiler was not able to prove that exact type of the source or the destination ++ // of the arraycopy is an array type, check at runtime if the source or the destination is ++ // an instance type. ++ if (flags & LIR_OpArrayCopy::type_check) { ++ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { ++ __ load_klass(tmp, dst); ++ __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset()))); ++ __ mv(t1, Klass::_lh_neutral_value); ++ __ bge(t0, t1, *stub->entry(), /* is_far */ true); ++ } ++ ++ if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { ++ __ load_klass(tmp, src); ++ __ lw(t0, Address(tmp, in_bytes(Klass::layout_helper_offset()))); ++ __ mv(t1, Klass::_lh_neutral_value); ++ __ bge(t0, t1, *stub->entry(), /* is_far */ true); ++ } ++ } ++ ++ // check if negative ++ if (flags & LIR_OpArrayCopy::src_pos_positive_check) { ++ __ bltz(src_pos, *stub->entry(), /* is_far */ true); ++ } ++ if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { ++ __ bltz(dst_pos, *stub->entry(), /* is_far */ true); ++ } ++ if (flags & LIR_OpArrayCopy::length_positive_check) { ++ __ bltz(length, *stub->entry(), /* is_far */ true); ++ } ++ ++ if (flags & LIR_OpArrayCopy::src_range_check) { ++ __ addw(tmp, src_pos, length); ++ __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes())); ++ __ bgtu(tmp, t0, *stub->entry(), /* is_far */ true); ++ } ++ if (flags & LIR_OpArrayCopy::dst_range_check) { ++ __ addw(tmp, dst_pos, length); ++ __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes())); ++ __ bgtu(tmp, t0, *stub->entry(), /* is_far */ true); ++ } ++} ++ ++void LIR_Assembler::arraycopy_checkcast(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos, Register tmp, ++ CodeStub *stub, BasicType basic_type, ++ address copyfunc_addr, int flags) { ++ // src is not a sub class of dst so we have to do a ++ // per-element check. ++ int mask = LIR_OpArrayCopy::src_objarray | LIR_OpArrayCopy::dst_objarray; ++ if ((flags & mask) != mask) { ++ // Check that at least both of them object arrays. ++ assert(flags & mask, "one of the two should be known to be an object array"); ++ ++ if (!(flags & LIR_OpArrayCopy::src_objarray)) { ++ __ load_klass(tmp, src); ++ } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { ++ __ load_klass(tmp, dst); ++ } ++ int lh_offset = in_bytes(Klass::layout_helper_offset()); ++ Address klass_lh_addr(tmp, lh_offset); ++ jint objArray_lh = Klass::array_layout_helper(T_OBJECT); ++ __ lw(t0, klass_lh_addr); ++ __ mvw(t1, objArray_lh); ++ __ bne(t0, t1, *stub->entry(), /* is_far */ true); ++ } ++ ++ // Spill because stubs can use any register they like and it's ++ // easier to restore just those that we care about. ++ arraycopy_store_args(src, src_pos, length, dst, dst_pos); ++ arraycopy_checkcast_prepare_params(src, src_pos, length, dst, dst_pos, basic_type); ++ __ far_call(RuntimeAddress(copyfunc_addr)); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ Label failed; ++ __ bnez(x10, failed); ++ __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt)); ++ __ bind(failed); ++ } ++#endif ++ ++ __ beqz(x10, *stub->continuation()); ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt)); ++ } ++#endif ++ assert_different_registers(dst, dst_pos, length, src_pos, src, x10, t0); ++ ++ // Restore previously spilled arguments ++ arraycopy_load_args(src, src_pos, length, dst, dst_pos); ++ ++ // return value is -1^K where K is partial copied count ++ __ xori(t0, x10, -1); ++ // adjust length down and src/end pos up by partial copied count ++ __ subw(length, length, t0); ++ __ addw(src_pos, src_pos, t0); ++ __ addw(dst_pos, dst_pos, t0); ++} ++ ++void LIR_Assembler::arraycopy_type_check(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos, Register tmp, ++ CodeStub *stub, BasicType basic_type, int flags) { ++ // We don't know the array types are compatible ++ if (basic_type != T_OBJECT) { ++ // Simple test for basic type arrays ++ if (UseCompressedClassPointers) { ++ __ lwu(tmp, Address(src, oopDesc::klass_offset_in_bytes())); ++ __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes())); ++ } else { ++ __ ld(tmp, Address(src, oopDesc::klass_offset_in_bytes())); ++ __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes())); ++ } ++ __ bne(tmp, t0, *stub->entry(), /* is_far */ true); ++ } else { ++ // For object arrays, if src is a sub class of dst then we can ++ // safely do the copy. ++ Label cont, slow; ++ ++#define PUSH(r1, r2) \ ++ __ addi(sp, sp, -2 * wordSize); \ ++ __ sd(r1, Address(sp, 1 * wordSize)); \ ++ __ sd(r2, Address(sp, 0)); ++ ++#define POP(r1, r2) \ ++ __ ld(r1, Address(sp, 1 * wordSize)); \ ++ __ ld(r2, Address(sp, 0)); \ ++ __ addi(sp, sp, 2 * wordSize); ++ ++ PUSH(src, dst); ++ __ load_klass(src, src); ++ __ load_klass(dst, dst); ++ __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); ++ ++ PUSH(src, dst); ++ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); ++ POP(src, dst); ++ __ bnez(dst, cont); ++ ++ __ bind(slow); ++ POP(src, dst); ++ ++ address copyfunc_addr = StubRoutines::checkcast_arraycopy(); ++ if (copyfunc_addr != NULL) { // use stub if available ++ arraycopy_checkcast(src, src_pos, length, dst, dst_pos, tmp, stub, basic_type, copyfunc_addr, flags); ++ } ++ ++ __ j(*stub->entry()); ++ __ bind(cont); ++ POP(src, dst); ++ } ++} ++ ++void LIR_Assembler::arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags) { ++ assert(default_type != NULL, "NULL default_type!"); ++ BasicType basic_type = default_type->element_type()->basic_type(); ++ if (basic_type == T_ARRAY) { basic_type = T_OBJECT; } ++ if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { ++ // Sanity check the known type with the incoming class. For the ++ // primitive case the types must match exactly with src.klass and ++ // dst.klass each exactly matching the default type. For the ++ // object array case, if no type check is needed then either the ++ // dst type is exactly the expected type and the src type is a ++ // subtype which we can't check or src is the same array as dst ++ // but not necessarily exactly of type default_type. ++ Label known_ok, halt; ++ __ mov_metadata(tmp, default_type->constant_encoding()); ++ if (UseCompressedClassPointers) { ++ __ encode_klass_not_null(tmp); ++ } ++ ++ if (basic_type != T_OBJECT) { ++ if (UseCompressedClassPointers) { ++ __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes())); ++ } else { ++ __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes())); ++ } ++ __ bne(tmp, t0, halt); ++ if (UseCompressedClassPointers) { ++ __ lwu(t0, Address(src, oopDesc::klass_offset_in_bytes())); ++ } else { ++ __ ld(t0, Address(src, oopDesc::klass_offset_in_bytes())); ++ } ++ __ beq(tmp, t0, known_ok); ++ } else { ++ if (UseCompressedClassPointers) { ++ __ lwu(t0, Address(dst, oopDesc::klass_offset_in_bytes())); ++ } else { ++ __ ld(t0, Address(dst, oopDesc::klass_offset_in_bytes())); ++ } ++ __ beq(tmp, t0, known_ok); ++ __ beq(src, dst, known_ok); ++ } ++ __ bind(halt); ++ __ stop("incorrect type information in arraycopy"); ++ __ bind(known_ok); ++ } ++} ++ ++void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { ++ ciArrayKlass *default_type = op->expected_type(); ++ Register src = op->src()->as_register(); ++ Register dst = op->dst()->as_register(); ++ Register src_pos = op->src_pos()->as_register(); ++ Register dst_pos = op->dst_pos()->as_register(); ++ Register length = op->length()->as_register(); ++ Register tmp = op->tmp()->as_register(); ++ ++ CodeStub* stub = op->stub(); ++ int flags = op->flags(); ++ BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; ++ if (basic_type == T_ARRAY) { basic_type = T_OBJECT; } ++ ++ // if we don't know anything, just go through the generic arraycopy ++ if (default_type == NULL) { ++ generic_arraycopy(src, src_pos, length, dst, dst_pos, stub); ++ return; ++ } ++ ++ assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), ++ "must be true at this point"); ++ ++ arraycopy_simple_check(src, src_pos, length, dst, dst_pos, tmp, stub, flags); ++ ++ if (flags & LIR_OpArrayCopy::type_check) { ++ arraycopy_type_check(src, src_pos, length, dst, dst_pos, tmp, stub, basic_type, flags); ++ } ++ ++#ifdef ASSERT ++ arraycopy_assert(src, dst, tmp, default_type, flags); ++#endif ++ ++#ifndef PRODUCT ++ if (PrintC1Statistics) { ++ __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type))); ++ } ++#endif ++ arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type); ++ ++ bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; ++ bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; ++ const char *name = NULL; ++ address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); ++ ++ CodeBlob *cb = CodeCache::find_blob(entry); ++ if (cb != NULL) { ++ __ far_call(RuntimeAddress(entry)); ++ } else { ++ const int args_num = 3; ++ __ call_VM_leaf(entry, args_num); ++ } ++ ++ __ bind(*stub->continuation()); ++} ++ ++ ++void LIR_Assembler::arraycopy_prepare_params(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos, BasicType basic_type) { ++ int scale = array_element_size(basic_type); ++ __ shadd(c_rarg0, src_pos, src, t0, scale); ++ __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(c_rarg0, dst, dst_pos, length); ++ __ shadd(c_rarg1, dst_pos, dst, t0, scale); ++ __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type)); ++ assert_different_registers(c_rarg1, dst, length); ++ __ mv(c_rarg2, length); ++ assert_different_registers(c_rarg2, dst); ++} ++ ++void LIR_Assembler::arraycopy_checkcast_prepare_params(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos, BasicType basic_type) { ++ arraycopy_prepare_params(src, src_pos, length, dst, dst_pos, basic_type); ++ __ load_klass(c_rarg4, dst); ++ __ ld(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset())); ++ __ lwu(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset())); ++} ++ ++void LIR_Assembler::arraycopy_store_args(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos) { ++ __ sd(dst_pos, Address(sp, 0)); // 0: dst_pos sp offset ++ __ sd(dst, Address(sp, 1 * BytesPerWord)); // 1: dst sp offset ++ __ sd(length, Address(sp, 2 * BytesPerWord)); // 2: length sp offset ++ __ sd(src_pos, Address(sp, 3 * BytesPerWord)); // 3: src_pos sp offset ++ __ sd(src, Address(sp, 4 * BytesPerWord)); // 4: src sp offset ++} ++ ++void LIR_Assembler::arraycopy_load_args(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos) { ++ __ ld(dst_pos, Address(sp, 0)); // 0: dst_pos sp offset ++ __ ld(dst, Address(sp, 1 * BytesPerWord)); // 1: dst sp offset ++ __ ld(length, Address(sp, 2 * BytesPerWord)); // 2: length sp offset ++ __ ld(src_pos, Address(sp, 3 * BytesPerWord)); // 3: src_pos sp offset ++ __ ld(src, Address(sp, 4 * BytesPerWord)); // 4: src sp offset ++} ++ ++#undef __ +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp +new file mode 100644 +index 000000000..872fd2ef6 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_arraycopy_riscv.hpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP ++#define CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP ++ // arraycopy sub functions ++ void generic_arraycopy(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos, CodeStub *stub); ++ void arraycopy_simple_check(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos, Register tmp, ++ CodeStub *stub, int flags); ++ void arraycopy_checkcast(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos, Register tmp, ++ CodeStub *stub, BasicType basic_type, ++ address copyfunc_addr, int flags); ++ void arraycopy_type_check(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos, Register tmp, ++ CodeStub *stub, BasicType basic_type, int flags); ++ void arraycopy_assert(Register src, Register dst, Register tmp, ciArrayKlass *default_type, int flags); ++ void arraycopy_prepare_params(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos, BasicType basic_type); ++ void arraycopy_checkcast_prepare_params(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos, BasicType basic_type); ++ void arraycopy_store_args(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos); ++ void arraycopy_load_args(Register src, Register src_pos, Register length, ++ Register dst, Register dst_pos); ++#endif // CPU_RISCV_C1_LIRASSEMBLER_ARRAYCOPY_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +new file mode 100644 +index 000000000..222e3e97e +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +@@ -0,0 +1,2275 @@ ++/* ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_Compilation.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "c1/c1_ValueStack.hpp" ++#include "ci/ciArrayKlass.hpp" ++#include "ci/ciInstance.hpp" ++#include "code/compiledIC.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "utilities/macros.hpp" ++#include "vmreg_riscv.inline.hpp" ++ ++#ifndef PRODUCT ++#define COMMENT(x) do { __ block_comment(x); } while (0) ++#else ++#define COMMENT(x) ++#endif ++ ++NEEDS_CLEANUP // remove this definitions ? ++const Register IC_Klass = t1; // where the IC klass is cached ++const Register SYNC_header = x10; // synchronization header ++const Register SHIFT_count = x10; // where count for shift operations must be ++ ++#define __ _masm-> ++ ++static void select_different_registers(Register preserve, ++ Register extra, ++ Register &tmp1, ++ Register &tmp2) { ++ if (tmp1 == preserve) { ++ assert_different_registers(tmp1, tmp2, extra); ++ tmp1 = extra; ++ } else if (tmp2 == preserve) { ++ assert_different_registers(tmp1, tmp2, extra); ++ tmp2 = extra; ++ } ++ assert_different_registers(preserve, tmp1, tmp2); ++} ++ ++static void select_different_registers(Register preserve, ++ Register extra, ++ Register &tmp1, ++ Register &tmp2, ++ Register &tmp3) { ++ if (tmp1 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp1 = extra; ++ } else if (tmp2 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp2 = extra; ++ } else if (tmp3 == preserve) { ++ assert_different_registers(tmp1, tmp2, tmp3, extra); ++ tmp3 = extra; ++ } ++ assert_different_registers(preserve, tmp1, tmp2, tmp3); ++} ++ ++bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } ++ ++ ++LIR_Opr LIR_Assembler::receiverOpr() { ++ return FrameMap::receiver_opr; ++} ++ ++LIR_Opr LIR_Assembler::osrBufferPointer() { ++ return FrameMap::as_pointer_opr(receiverOpr()->as_register()); ++} ++ ++//--------------fpu register translations----------------------- ++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::reset_FPU() { Unimplemented(); } ++ ++void LIR_Assembler::fpop() { Unimplemented(); } ++ ++void LIR_Assembler::fxch(int i) { Unimplemented(); } ++ ++void LIR_Assembler::fld(int i) { Unimplemented(); } ++ ++void LIR_Assembler::ffree(int i) { Unimplemented(); } ++ ++void LIR_Assembler::breakpoint() { Unimplemented(); } ++ ++void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } ++ ++void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } ++//------------------------------------------- ++ ++static jlong as_long(LIR_Opr data) { ++ jlong result; ++ switch (data->type()) { ++ case T_INT: ++ result = (data->as_jint()); ++ break; ++ case T_LONG: ++ result = (data->as_jlong()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ result = 0; // unreachable ++ } ++ return result; ++} ++ ++Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { ++ ShouldNotReachHere(); ++ return Address(); ++} ++ ++Address LIR_Assembler::as_Address(LIR_Address* addr) { ++ return as_Address(addr, t0); ++} ++ ++Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { ++ return as_Address(addr); ++} ++ ++// Ensure a valid Address (base + offset) to a stack-slot. If stack access is ++// not encodable as a base + (immediate) offset, generate an explicit address ++// calculation to hold the address in a temporary register. ++Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) { ++ precond(size == 4 || size == 8); ++ Address addr = frame_map()->address_for_slot(index, adjust); ++ precond(addr.getMode() == Address::base_plus_offset); ++ precond(addr.base() == sp); ++ precond(addr.offset() > 0); ++ uint mask = size - 1; ++ assert((addr.offset() & mask) == 0, "scaled offsets only"); ++ ++ return addr; ++} ++ ++void LIR_Assembler::osr_entry() { ++ offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); ++ BlockBegin* osr_entry = compilation()->hir()->osr_entry(); ++ guarantee(osr_entry != NULL, "NULL osr_entry!"); ++ ValueStack* entry_state = osr_entry->state(); ++ int number_of_locks = entry_state->locks_size(); ++ ++ // we jump here if osr happens with the interpreter ++ // state set up to continue at the beginning of the ++ // loop that triggered osr - in particular, we have ++ // the following registers setup: ++ // ++ // x12: osr buffer ++ // ++ ++ //build frame ++ ciMethod* m = compilation()->method(); ++ __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); ++ ++ // OSR buffer is ++ // ++ // locals[nlocals-1..0] ++ // monitors[0..number_of_locks] ++ // ++ // locals is a direct copy of the interpreter frame so in the osr buffer ++ // so first slot in the local array is the last local from the interpreter ++ // and last slot is local[0] (receiver) from the interpreter ++ // ++ // Similarly with locks. The first lock slot in the osr buffer is the nth lock ++ // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock ++ // in the interpreter frame (the method lock if a sync method) ++ ++ // Initialize monitors in the compiled activation. ++ // x12: pointer to osr buffer ++ // All other registers are dead at this point and the locals will be ++ // copied into place by code emitted in the IR. ++ ++ Register OSR_buf = osrBufferPointer()->as_pointer_register(); ++ { ++ assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); ++ int monitor_offset = BytesPerWord * method()->max_locals() + ++ (2 * BytesPerWord) * (number_of_locks - 1); ++ // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in ++ // the OSR buffer using 2 word entries: first the lock and then ++ // the oop. ++ for (int i = 0; i < number_of_locks; i++) { ++ int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); ++#ifdef ASSERT ++ // verify the interpreter's monitor has a non-null object ++ { ++ Label L; ++ __ ld(t0, Address(OSR_buf, slot_offset + 1 * BytesPerWord)); ++ __ bnez(t0, L); ++ __ stop("locked object is NULL"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ __ ld(x9, Address(OSR_buf, slot_offset + 0)); ++ __ sd(x9, frame_map()->address_for_monitor_lock(i)); ++ __ ld(x9, Address(OSR_buf, slot_offset + 1 * BytesPerWord)); ++ __ sd(x9, frame_map()->address_for_monitor_object(i)); ++ } ++ } ++} ++ ++// inline cache check; done before the frame is built. ++int LIR_Assembler::check_icache() { ++ Register receiver = FrameMap::receiver_opr->as_register(); ++ Register ic_klass = IC_Klass; ++ int start_offset = __ offset(); ++ Label dont; ++ __ inline_cache_check(receiver, ic_klass, dont); ++ ++ // if icache check fails, then jump to runtime routine ++ // Note: RECEIVER must still contain the receiver! ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); ++ ++ // We align the verified entry point unless the method body ++ // (including its inline cache check) will fit in a single 64-byte ++ // icache line. ++ if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) { ++ // force alignment after the cache check. ++ __ align(CodeEntryAlignment); ++ } ++ ++ __ bind(dont); ++ return start_offset; ++} ++ ++void LIR_Assembler::jobject2reg(jobject o, Register reg) { ++ if (o == NULL) { ++ __ mv(reg, zr); ++ } else { ++ __ movoop(reg, o, /* immediate */ true); ++ } ++} ++ ++void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { ++ deoptimize_trap(info); ++} ++ ++// This specifies the rsp decrement needed to build the frame ++int LIR_Assembler::initial_frame_size_in_bytes() const { ++ // if rounding, must let FrameMap know! ++ ++ // The frame_map records size in slots (32bit word) ++ ++ // subtract two words to account for return address and link ++ return (frame_map()->framesize() - (2 * VMRegImpl::slots_per_word)) * VMRegImpl::stack_slot_size; ++} ++ ++int LIR_Assembler::emit_exception_handler() { ++ // if the last instruction is a call (typically to do a throw which ++ // is coming at the end after block reordering) the return address ++ // must still point into the code area in order to avoid assertion ++ // failures when searching for the corresponding bci ==> add a nop ++ // (was bug 5/14/1999 -gri) ++ __ nop(); ++ ++ // generate code for exception handler ++ address handler_base = __ start_a_stub(exception_handler_size()); ++ if (handler_base == NULL) { ++ // not enough space left for the handler ++ bailout("exception handler overflow"); ++ return -1; ++ } ++ ++ int offset = code_offset(); ++ ++ // the exception oop and pc are in x10, and x13 ++ // no other registers need to be preserved, so invalidate them ++ __ invalidate_registers(false, true, true, false, true, true); ++ ++ // check that there is really an exception ++ __ verify_not_null_oop(x10); ++ ++ // search an exception handler (x10: exception oop, x13: throwing pc) ++ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id))); ++ __ should_not_reach_here(); ++ guarantee(code_offset() - offset <= exception_handler_size(), "overflow"); ++ __ end_a_stub(); ++ ++ return offset; ++} ++ ++// Emit the code to remove the frame from the stack in the exception ++// unwind path. ++int LIR_Assembler::emit_unwind_handler() { ++#ifndef PRODUCT ++ if (CommentedAssembly) { ++ _masm->block_comment("Unwind handler"); ++ } ++#endif // PRODUCT ++ ++ int offset = code_offset(); ++ ++ // Fetch the exception from TLS and clear out exception related thread state ++ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); ++ ++ __ bind(_unwind_handler_entry); ++ __ verify_not_null_oop(x10); ++ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { ++ __ mv(x9, x10); // Perserve the exception ++ } ++ ++ // Preform needed unlocking ++ MonitorExitStub* stub = NULL; ++ if (method()->is_synchronized()) { ++ monitor_address(0, FrameMap::r10_opr); ++ stub = new MonitorExitStub(FrameMap::r10_opr, true, 0); ++ __ unlock_object(x15, x14, x10, *stub->entry()); ++ __ bind(*stub->continuation()); ++ } ++ ++ if (compilation()->env()->dtrace_method_probes()) { ++ __ mv(c_rarg0, xthread); ++ __ mov_metadata(c_rarg1, method()->constant_encoding()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), c_rarg0, c_rarg1); ++ } ++ ++ if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { ++ __ mv(x10, x9); // Restore the exception ++ } ++ ++ // remove the activation and dispatch to the unwind handler ++ __ block_comment("remove_frame and dispatch to the unwind handler"); ++ __ remove_frame(initial_frame_size_in_bytes()); ++ __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id))); ++ ++ // Emit the slow path assembly ++ if (stub != NULL) { ++ stub->emit_code(this); ++ } ++ ++ return offset; ++} ++ ++int LIR_Assembler::emit_deopt_handler() { ++ // if the last instruciton is a call (typically to do a throw which ++ // is coming at the end after block reordering) the return address ++ // must still point into the code area in order to avoid assertion ++ // failures when searching for the corresponding bck => add a nop ++ // (was bug 5/14/1999 - gri) ++ __ nop(); ++ ++ // generate code for exception handler ++ address handler_base = __ start_a_stub(deopt_handler_size()); ++ if (handler_base == NULL) { ++ // not enough space left for the handler ++ bailout("deopt handler overflow"); ++ return -1; ++ } ++ ++ int offset = code_offset(); ++ ++ __ auipc(ra, 0); ++ __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); ++ guarantee(code_offset() - offset <= deopt_handler_size(), "overflow"); ++ __ end_a_stub(); ++ ++ return offset; ++} ++ ++void LIR_Assembler::return_op(LIR_Opr result) { ++ assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == x10, "word returns are in x10"); ++ ++ // Pop the stack before the safepoint code ++ __ remove_frame(initial_frame_size_in_bytes()); ++ ++ if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } ++ ++ address polling_page(os::get_polling_page()); ++ __ read_polling_page(t0, polling_page, relocInfo::poll_return_type); ++ __ ret(); ++} ++ ++int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { ++ address polling_page(os::get_polling_page()); ++ guarantee(info != NULL, "Shouldn't be NULL"); ++ assert(os::is_poll_address(polling_page), "should be"); ++ int32_t offset = 0; ++ __ get_polling_page(t0, polling_page, offset, relocInfo::poll_type); ++ add_debug_info_for_branch(info); // This isn't just debug info: ++ // it's the oop map ++ __ read_polling_page(t0, offset, relocInfo::poll_type); ++ return __ offset(); ++} ++ ++void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { ++ __ mv(to_reg, from_reg); ++} ++ ++void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); } ++ ++void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { ++ assert(src->is_constant(), "should not call otherwise"); ++ assert(dest->is_register(), "should not call otherwise"); ++ LIR_Const* c = src->as_constant_ptr(); ++ address const_addr = NULL; ++ ++ switch (c->type()) { ++ case T_INT: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ mvw(dest->as_register(), c->as_jint()); ++ break; ++ ++ case T_ADDRESS: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ mv(dest->as_register(), c->as_jint()); ++ break; ++ ++ case T_LONG: ++ assert(patch_code == lir_patch_none, "no patching handled here"); ++ __ mv(dest->as_register_lo(), (intptr_t)c->as_jlong()); ++ break; ++ ++ case T_OBJECT: ++ case T_ARRAY: ++ if (patch_code == lir_patch_none) { ++ jobject2reg(c->as_jobject(), dest->as_register()); ++ } else { ++ jobject2reg_with_patching(dest->as_register(), info); ++ } ++ break; ++ ++ case T_METADATA: ++ if (patch_code != lir_patch_none) { ++ klass2reg_with_patching(dest->as_register(), info); ++ } else { ++ __ mov_metadata(dest->as_register(), c->as_metadata()); ++ } ++ break; ++ ++ case T_FLOAT: ++ const_addr = float_constant(c->as_jfloat()); ++ assert(const_addr != NULL, "must create float constant in the constant table"); ++ __ flw(dest->as_float_reg(), InternalAddress(const_addr)); ++ break; ++ ++ case T_DOUBLE: ++ const_addr = double_constant(c->as_jdouble()); ++ assert(const_addr != NULL, "must create double constant in the constant table"); ++ __ fld(dest->as_double_reg(), InternalAddress(const_addr)); ++ break; ++ ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { ++ assert(src->is_constant(), "should not call otherwise"); ++ assert(dest->is_stack(), "should not call otherwise"); ++ LIR_Const* c = src->as_constant_ptr(); ++ switch (c->type()) { ++ case T_OBJECT: ++ if (c->as_jobject() == NULL) { ++ __ sd(zr, frame_map()->address_for_slot(dest->single_stack_ix())); ++ } else { ++ const2reg(src, FrameMap::t1_opr, lir_patch_none, NULL); ++ reg2stack(FrameMap::t1_opr, dest, c->type(), false); ++ } ++ break; ++ case T_ADDRESS: // fall through ++ const2reg(src, FrameMap::t1_opr, lir_patch_none, NULL); ++ reg2stack(FrameMap::t1_opr, dest, c->type(), false); ++ case T_INT: // fall through ++ case T_FLOAT: ++ if (c->as_jint_bits() == 0) { ++ __ sw(zr, frame_map()->address_for_slot(dest->single_stack_ix())); ++ } else { ++ __ mvw(t1, c->as_jint_bits()); ++ __ sw(t1, frame_map()->address_for_slot(dest->single_stack_ix())); ++ } ++ break; ++ case T_LONG: // fall through ++ case T_DOUBLE: ++ if (c->as_jlong_bits() == 0) { ++ __ sd(zr, frame_map()->address_for_slot(dest->double_stack_ix(), ++ lo_word_offset_in_bytes)); ++ } else { ++ __ mv(t1, (intptr_t)c->as_jlong_bits()); ++ __ sd(t1, frame_map()->address_for_slot(dest->double_stack_ix(), ++ lo_word_offset_in_bytes)); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) { ++ assert(src->is_constant(), "should not call otherwise"); ++ assert(dest->is_address(), "should not call otherwise"); ++ LIR_Const* c = src->as_constant_ptr(); ++ LIR_Address* to_addr = dest->as_address_ptr(); ++ void (Assembler::* insn)(Register Rt, const Address &adr, Register temp); ++ switch (type) { ++ case T_ADDRESS: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::sd; break; ++ case T_LONG: ++ assert(c->as_jlong() == 0, "should be"); ++ insn = &Assembler::sd; break; ++ case T_DOUBLE: ++ assert(c->as_jdouble() == 0.0, "should be"); ++ insn = &Assembler::sd; break; ++ case T_INT: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::sw; break; ++ case T_FLOAT: ++ assert(c->as_jfloat() == 0.0f, "should be"); ++ insn = &Assembler::sw; break; ++ case T_OBJECT: // fall through ++ case T_ARRAY: ++ assert(c->as_jobject() == 0, "should be"); ++ if (UseCompressedOops && !wide) { ++ insn = &Assembler::sw; ++ } else { ++ insn = &Assembler::sd; ++ } ++ break; ++ case T_CHAR: // fall through ++ case T_SHORT: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::sh; ++ break; ++ case T_BOOLEAN: // fall through ++ case T_BYTE: ++ assert(c->as_jint() == 0, "should be"); ++ insn = &Assembler::sb; break; ++ default: ++ ShouldNotReachHere(); ++ insn = &Assembler::sd; // unreachable ++ } ++ if (info != NULL) { ++ add_debug_info_for_null_check_here(info); ++ } ++ (_masm->*insn)(zr, as_Address(to_addr), t0); ++} ++ ++void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { ++ assert(src->is_register(), "should not call otherwise"); ++ assert(dest->is_register(), "should not call otherwise"); ++ ++ // move between cpu-registers ++ if (dest->is_single_cpu()) { ++ if (src->type() == T_LONG) { ++ // Can do LONG -> OBJECT ++ move_regs(src->as_register_lo(), dest->as_register()); ++ return; ++ } ++ assert(src->is_single_cpu(), "must match"); ++ if (src->type() == T_OBJECT) { ++ __ verify_oop(src->as_register()); ++ } ++ move_regs(src->as_register(), dest->as_register()); ++ } else if (dest->is_double_cpu()) { ++ if (src->type() == T_OBJECT || src->type() == T_ARRAY) { ++ __ verify_oop(src->as_register()); ++ move_regs(src->as_register(), dest->as_register_lo()); ++ return; ++ } ++ assert(src->is_double_cpu(), "must match"); ++ Register f_lo = src->as_register_lo(); ++ Register f_hi = src->as_register_hi(); ++ Register t_lo = dest->as_register_lo(); ++ Register t_hi = dest->as_register_hi(); ++ assert(f_hi == f_lo, "must be same"); ++ assert(t_hi == t_lo, "must be same"); ++ move_regs(f_lo, t_lo); ++ } else if (dest->is_single_fpu()) { ++ assert(src->is_single_fpu(), "expect single fpu"); ++ __ fmv_s(dest->as_float_reg(), src->as_float_reg()); ++ } else if (dest->is_double_fpu()) { ++ assert(src->is_double_fpu(), "expect double fpu"); ++ __ fmv_d(dest->as_double_reg(), src->as_double_reg()); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { ++ precond(src->is_register() && dest->is_stack()); ++ ++ uint const c_sz32 = sizeof(uint32_t); ++ uint const c_sz64 = sizeof(uint64_t); ++ ++ assert(src->is_register(), "should not call otherwise"); ++ assert(dest->is_stack(), "should not call otherwise"); ++ if (src->is_single_cpu()) { ++ int index = dest->single_stack_ix(); ++ if (is_reference_type(type)) { ++ __ sd(src->as_register(), stack_slot_address(index, c_sz64)); ++ __ verify_oop(src->as_register()); ++ } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) { ++ __ sd(src->as_register(), stack_slot_address(index, c_sz64)); ++ } else { ++ __ sw(src->as_register(), stack_slot_address(index, c_sz32)); ++ } ++ } else if (src->is_double_cpu()) { ++ int index = dest->double_stack_ix(); ++ Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); ++ __ sd(src->as_register_lo(), dest_addr_LO); ++ } else if (src->is_single_fpu()) { ++ int index = dest->single_stack_ix(); ++ __ fsw(src->as_float_reg(), stack_slot_address(index, c_sz32)); ++ } else if (src->is_double_fpu()) { ++ int index = dest->double_stack_ix(); ++ __ fsd(src->as_double_reg(), stack_slot_address(index, c_sz64)); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, ++ bool pop_fpu_stack, bool wide, bool /* unaligned */) { ++ LIR_Address* to_addr = dest->as_address_ptr(); ++ // t0 was used as tmp reg in as_Address, so we use t1 as compressed_src ++ Register compressed_src = t1; ++ ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ if (type == T_ARRAY || type == T_OBJECT) { ++ __ verify_oop(src->as_register()); ++ ++ if (UseCompressedOops && !wide) { ++ __ encode_heap_oop(compressed_src, src->as_register()); ++ } else { ++ compressed_src = src->as_register(); ++ } ++ } ++ ++ int null_check_here = code_offset(); ++ ++ switch (type) { ++ case T_FLOAT: ++ __ fsw(src->as_float_reg(), as_Address(to_addr)); ++ break; ++ ++ case T_DOUBLE: ++ __ fsd(src->as_double_reg(), as_Address(to_addr)); ++ break; ++ ++ case T_ARRAY: // fall through ++ case T_OBJECT: ++ if (UseCompressedOops && !wide) { ++ __ sw(compressed_src, as_Address(to_addr)); ++ } else { ++ __ sd(compressed_src, as_Address(to_addr)); ++ } ++ break; ++ case T_METADATA: ++ // We get here to store a method pointer to the stack to pass to ++ // a dtrace runtime call. This can't work on 64 bit with ++ // compressed klass ptrs: T_METADATA can be compressed klass ++ // ptr or a 64 bit method pointer. ++ ShouldNotReachHere(); ++ __ sd(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_ADDRESS: ++ __ sd(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_INT: ++ __ sw(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_LONG: ++ __ sd(src->as_register_lo(), as_Address(to_addr)); ++ break; ++ case T_BYTE: // fall through ++ case T_BOOLEAN: ++ __ sb(src->as_register(), as_Address(to_addr)); ++ break; ++ case T_CHAR: // fall through ++ case T_SHORT: ++ __ sh(src->as_register(), as_Address(to_addr)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ if (info != NULL) { ++ add_debug_info_for_null_check(null_check_here, info); ++ } ++} ++ ++void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { ++ precond(src->is_stack() && dest->is_register()); ++ ++ uint const c_sz32 = sizeof(uint32_t); ++ uint const c_sz64 = sizeof(uint64_t); ++ ++ if (dest->is_single_cpu()) { ++ int index = src->single_stack_ix(); ++ if (type == T_INT) { ++ __ lw(dest->as_register(), stack_slot_address(index, c_sz32)); ++ } else if (is_reference_type(type)) { ++ __ ld(dest->as_register(), stack_slot_address(index, c_sz64)); ++ __ verify_oop(dest->as_register()); ++ } else if (type == T_METADATA || type == T_ADDRESS) { ++ __ ld(dest->as_register(), stack_slot_address(index, c_sz64)); ++ } else { ++ __ lwu(dest->as_register(), stack_slot_address(index, c_sz32)); ++ } ++ } else if (dest->is_double_cpu()) { ++ int index = src->double_stack_ix(); ++ Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); ++ __ ld(dest->as_register_lo(), src_addr_LO); ++ } else if (dest->is_single_fpu()) { ++ int index = src->single_stack_ix(); ++ __ flw(dest->as_float_reg(), stack_slot_address(index, c_sz32)); ++ } else if (dest->is_double_fpu()) { ++ int index = src->double_stack_ix(); ++ __ fld(dest->as_double_reg(), stack_slot_address(index, c_sz64)); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { ++ deoptimize_trap(info); ++} ++ ++void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { ++ LIR_Opr temp; ++ if (type == T_LONG || type == T_DOUBLE) { ++ temp = FrameMap::t1_long_opr; ++ } else { ++ temp = FrameMap::t1_opr; ++ } ++ ++ stack2reg(src, temp, src->type()); ++ reg2stack(temp, dest, dest->type(), false); ++} ++ ++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, ++ bool wide, bool /* unaligned */) { ++ assert(src->is_address(), "should not call otherwise"); ++ assert(dest->is_register(), "should not call otherwise"); ++ ++ LIR_Address* addr = src->as_address_ptr(); ++ LIR_Address* from_addr = src->as_address_ptr(); ++ ++ if (addr->base()->type() == T_OBJECT) { ++ __ verify_oop(addr->base()->as_pointer_register()); ++ } ++ ++ if (patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++ ++ if (info != NULL) { ++ add_debug_info_for_null_check_here(info); ++ } ++ ++ int null_check_here = code_offset(); ++ switch (type) { ++ case T_FLOAT: ++ __ flw(dest->as_float_reg(), as_Address(from_addr)); ++ break; ++ case T_DOUBLE: ++ __ fld(dest->as_double_reg(), as_Address(from_addr)); ++ break; ++ case T_ARRAY: // fall through ++ case T_OBJECT: ++ if (UseCompressedOops && !wide) { ++ __ lwu(dest->as_register(), as_Address(from_addr)); ++ } else { ++ __ ld(dest->as_register(), as_Address(from_addr)); ++ } ++ break; ++ case T_METADATA: ++ // We get here to store a method pointer to the stack to pass to ++ // a dtrace runtime call. This can't work on 64 bit with ++ // compressed klass ptrs: T_METADATA can be a compressed klass ++ // ptr or a 64 bit method pointer. ++ ShouldNotReachHere(); ++ __ ld(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_ADDRESS: ++ if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ __ lwu(dest->as_register(), as_Address(from_addr)); ++ } else { ++ __ ld(dest->as_register(), as_Address(from_addr)); ++ } ++ break; ++ case T_INT: ++ __ lw(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_LONG: ++ __ ld(dest->as_register_lo(), as_Address_lo(from_addr)); ++ break; ++ case T_BYTE: ++ __ lb(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_BOOLEAN: ++ __ lbu(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_CHAR: ++ __ lhu(dest->as_register(), as_Address(from_addr)); ++ break; ++ case T_SHORT: ++ __ lh(dest->as_register(), as_Address(from_addr)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ if (type == T_ARRAY || type == T_OBJECT) { ++ if (UseCompressedOops && !wide) { ++ __ decode_heap_oop(dest->as_register()); ++ } ++ __ verify_oop(dest->as_register()); ++ } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { ++ if (UseCompressedClassPointers) { ++ __ decode_klass_not_null(dest->as_register()); ++ } ++ } ++} ++ ++void LIR_Assembler::emit_op3(LIR_Op3* op) { ++ switch (op->code()) { ++ case lir_idiv: ++ case lir_irem: ++ arithmetic_idiv(op->code(), ++ op->in_opr1(), ++ op->in_opr2(), ++ op->in_opr3(), ++ op->result_opr(), ++ op->info()); ++ break; ++ case lir_fmad: ++ __ fmadd_d(op->result_opr()->as_double_reg(), ++ op->in_opr1()->as_double_reg(), ++ op->in_opr2()->as_double_reg(), ++ op->in_opr3()->as_double_reg()); ++ break; ++ case lir_fmaf: ++ __ fmadd_s(op->result_opr()->as_float_reg(), ++ op->in_opr1()->as_float_reg(), ++ op->in_opr2()->as_float_reg(), ++ op->in_opr3()->as_float_reg()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, ++ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { ++ Label label; ++ ++ emit_branch(condition, cmp_opr1, cmp_opr2, label, /* is_far */ false, ++ /* is_unordered */ (condition == lir_cond_greaterEqual || condition == lir_cond_greater) ? false : true); ++ ++ Label done; ++ move_op(opr2, result, type, lir_patch_none, NULL, ++ false, // pop_fpu_stack ++ false, // unaligned ++ false); // wide ++ __ j(done); ++ __ bind(label); ++ move_op(opr1, result, type, lir_patch_none, NULL, ++ false, // pop_fpu_stack ++ false, // unaligned ++ false); // wide ++ __ bind(done); ++} ++ ++void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { ++ LIR_Condition condition = op->cond(); ++ if (condition == lir_cond_always) { ++ if (op->info() != NULL) { ++ add_debug_info_for_branch(op->info()); ++ } ++ } else { ++ assert(op->in_opr1() != LIR_OprFact::illegalOpr && op->in_opr2() != LIR_OprFact::illegalOpr, "conditional branches must have legal operands"); ++ } ++ bool is_unordered = (op->ublock() == op->block()); ++ emit_branch(condition, op->in_opr1(), op->in_opr2(), *op->label(), /* is_far */ true, is_unordered); ++} ++ ++void LIR_Assembler::emit_branch(LIR_Condition cmp_flag, LIR_Opr cmp1, LIR_Opr cmp2, Label& label, ++ bool is_far, bool is_unordered) { ++ ++ if (cmp_flag == lir_cond_always) { ++ __ j(label); ++ return; ++ } ++ ++ if (cmp1->is_cpu_register()) { ++ Register reg1 = as_reg(cmp1); ++ if (cmp2->is_cpu_register()) { ++ Register reg2 = as_reg(cmp2); ++ __ c1_cmp_branch(cmp_flag, reg1, reg2, label, cmp1->type(), is_far); ++ } else if (cmp2->is_constant()) { ++ const2reg_helper(cmp2); ++ __ c1_cmp_branch(cmp_flag, reg1, t0, label, cmp2->type(), is_far); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (cmp1->is_single_fpu()) { ++ assert(cmp2->is_single_fpu(), "expect single float register"); ++ __ c1_float_cmp_branch(cmp_flag, cmp1->as_float_reg(), cmp2->as_float_reg(), label, is_far, is_unordered); ++ } else if (cmp1->is_double_fpu()) { ++ assert(cmp2->is_double_fpu(), "expect double float register"); ++ __ c1_float_cmp_branch(cmp_flag | C1_MacroAssembler::c1_double_branch_mask, ++ cmp1->as_double_reg(), cmp2->as_double_reg(), label, is_far, is_unordered); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { ++ LIR_Opr src = op->in_opr(); ++ LIR_Opr dest = op->result_opr(); ++ ++ switch (op->bytecode()) { ++ case Bytecodes::_i2f: ++ __ fcvt_s_w(dest->as_float_reg(), src->as_register()); break; ++ case Bytecodes::_i2d: ++ __ fcvt_d_w(dest->as_double_reg(), src->as_register()); break; ++ case Bytecodes::_l2d: ++ __ fcvt_d_l(dest->as_double_reg(), src->as_register_lo()); break; ++ case Bytecodes::_l2f: ++ __ fcvt_s_l(dest->as_float_reg(), src->as_register_lo()); break; ++ case Bytecodes::_f2d: ++ __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg()); break; ++ case Bytecodes::_d2f: ++ __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); break; ++ case Bytecodes::_i2c: ++ __ zero_extend(dest->as_register(), src->as_register(), 16); break; ++ case Bytecodes::_i2l: ++ __ addw(dest->as_register_lo(), src->as_register(), zr); break; ++ case Bytecodes::_i2s: ++ __ sign_extend(dest->as_register(), src->as_register(), 16); break; ++ case Bytecodes::_i2b: ++ __ sign_extend(dest->as_register(), src->as_register(), 8); break; ++ case Bytecodes::_l2i: ++ _masm->block_comment("FIXME: This coulde be no-op"); ++ __ addw(dest->as_register(), src->as_register_lo(), zr); break; ++ case Bytecodes::_d2l: ++ __ fcvt_l_d_safe(dest->as_register_lo(), src->as_double_reg()); break; ++ case Bytecodes::_f2i: ++ __ fcvt_w_s_safe(dest->as_register(), src->as_float_reg()); break; ++ case Bytecodes::_f2l: ++ __ fcvt_l_s_safe(dest->as_register_lo(), src->as_float_reg()); break; ++ case Bytecodes::_d2i: ++ __ fcvt_w_d_safe(dest->as_register(), src->as_double_reg()); break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { ++ if (op->init_check()) { ++ __ lbu(t0, Address(op->klass()->as_register(), ++ InstanceKlass::init_state_offset())); ++ __ mvw(t1, InstanceKlass::fully_initialized); ++ add_debug_info_for_null_check_here(op->stub()->info()); ++ __ bne(t0, t1, *op->stub()->entry(), /* is_far */ true); ++ } ++ ++ __ allocate_object(op->obj()->as_register(), ++ op->tmp1()->as_register(), ++ op->tmp2()->as_register(), ++ op->header_size(), ++ op->object_size(), ++ op->klass()->as_register(), ++ *op->stub()->entry()); ++ ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { ++ Register len = op->len()->as_register(); ++ ++ if (UseSlowPath || ++ (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) || ++ (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) { ++ __ j(*op->stub()->entry()); ++ } else { ++ Register tmp1 = op->tmp1()->as_register(); ++ Register tmp2 = op->tmp2()->as_register(); ++ Register tmp3 = op->tmp3()->as_register(); ++ if (len == tmp1) { ++ tmp1 = tmp3; ++ } else if (len == tmp2) { ++ tmp2 = tmp3; ++ } else if (len == tmp3) { ++ // everything is ok ++ } else { ++ __ mv(tmp3, len); ++ } ++ __ allocate_array(op->obj()->as_register(), ++ len, ++ tmp1, ++ tmp2, ++ arrayOopDesc::header_size(op->type()), ++ array_element_size(op->type()), ++ op->klass()->as_register(), ++ *op->stub()->entry()); ++ } ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, ++ Register recv, Label* update_done) { ++ for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { ++ Label next_test; ++ // See if the receiver is receiver[n]. ++ __ ld(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); ++ __ bne(recv, t1, next_test); ++ Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); ++ __ increment(data_addr, DataLayout::counter_increment); ++ __ j(*update_done); ++ __ bind(next_test); ++ } ++ ++ // Didn't find receiver; find next empty slot and fill it in ++ for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { ++ Label next_test; ++ Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))); ++ __ ld(t1, recv_addr); ++ __ bnez(t1, next_test); ++ __ sd(recv, recv_addr); ++ __ mv(t1, DataLayout::counter_increment); ++ __ sd(t1, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); ++ __ j(*update_done); ++ __ bind(next_test); ++ } ++} ++ ++void LIR_Assembler::data_check(LIR_OpTypeCheck *op, ciMethodData **md, ciProfileData **data) { ++ ciMethod* method = op->profiled_method(); ++ assert(method != NULL, "Should have method"); ++ int bci = op->profiled_bci(); ++ *md = method->method_data_or_null(); ++ guarantee(*md != NULL, "Sanity"); ++ *data = ((*md)->bci_to_data(bci)); ++ assert(*data != NULL, "need data for type check"); ++ assert((*data)->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); ++} ++ ++void LIR_Assembler::typecheck_helper_slowcheck(ciKlass *k, Register obj, Register Rtmp1, ++ Register k_RInfo, Register klass_RInfo, ++ Label *failure_target, Label *success_target) { ++ // get object class ++ // not a safepoint as obj null check happens earlier ++ __ load_klass(klass_RInfo, obj); ++ if (k->is_loaded()) { ++ // See if we get an immediate positive hit ++ __ ld(t0, Address(klass_RInfo, long(k->super_check_offset()))); ++ if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { ++ __ bne(k_RInfo, t0, *failure_target, /* is_far */ true); ++ // successful cast, fall through to profile or jump ++ } else { ++ // See if we get an immediate positive hit ++ __ beq(k_RInfo, t0, *success_target); ++ // check for self ++ __ beq(klass_RInfo, k_RInfo, *success_target); ++ ++ __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo ++ __ sd(k_RInfo, Address(sp, 0)); // sub klass ++ __ sd(klass_RInfo, Address(sp, wordSize)); // super klass ++ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); ++ // load result to k_RInfo ++ __ ld(k_RInfo, Address(sp, 0)); ++ __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo ++ // result is a boolean ++ __ beqz(k_RInfo, *failure_target, /* is_far */ true); ++ // successful cast, fall through to profile or jump ++ } ++ } else { ++ // perform the fast part of the checking logic ++ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); ++ // call out-of-line instance of __ check_klass_subtytpe_slow_path(...) ++ __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo ++ __ sd(klass_RInfo, Address(sp, wordSize)); // sub klass ++ __ sd(k_RInfo, Address(sp, 0)); // super klass ++ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); ++ // load result to k_RInfo ++ __ ld(k_RInfo, Address(sp, 0)); ++ __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo ++ // result is a boolean ++ __ beqz(k_RInfo, *failure_target, /* is_far */ true); ++ // successful cast, fall thriugh to profile or jump ++ } ++} ++ ++void LIR_Assembler::profile_object(ciMethodData* md, ciProfileData* data, Register obj, ++ Register klass_RInfo, Label* obj_is_null) { ++ Label not_null; ++ __ bnez(obj, not_null); ++ // Object is null, update MDO and exit ++ Register mdo = klass_RInfo; ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address data_addr = __ form_address(mdo, /* base */ ++ md->byte_offset_of_slot(data, DataLayout::flags_offset()), /* offset */ ++ 12, /* expect offset bits */ ++ t1); /* temp reg */ ++ __ lbu(t0, data_addr); ++ __ ori(t0, t0, BitData::null_seen_byte_constant()); ++ __ sb(t0, data_addr); ++ __ j(*obj_is_null); ++ __ bind(not_null); ++} ++ ++void LIR_Assembler::typecheck_loaded(LIR_OpTypeCheck *op, ciKlass* k, Register k_RInfo) { ++ if (!k->is_loaded()) { ++ klass2reg_with_patching(k_RInfo, op->info_for_patch()); ++ } else { ++ __ mov_metadata(k_RInfo, k->constant_encoding()); ++ } ++} ++ ++void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) { ++ Register obj = op->object()->as_register(); ++ Register k_RInfo = op->tmp1()->as_register(); ++ Register klass_RInfo = op->tmp2()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ ciKlass* k = op->klass(); ++ Register Rtmp1 = noreg; ++ ++ // check if it needs to be profiled ++ ciMethodData* md = NULL; ++ ciProfileData* data = NULL; ++ ++ const bool should_profile = op->should_profile(); ++ if (should_profile) { ++ data_check(op, &md, &data); ++ } ++ Label profile_cast_success, profile_cast_failure; ++ Label *success_target = should_profile ? &profile_cast_success : success; ++ Label *failure_target = should_profile ? &profile_cast_failure : failure; ++ ++ if (obj == k_RInfo) { ++ k_RInfo = dst; ++ } else if (obj == klass_RInfo) { ++ klass_RInfo = dst; ++ } ++ if (k->is_loaded() && !UseCompressedClassPointers) { ++ select_different_registers(obj, dst, k_RInfo, klass_RInfo); ++ } else { ++ Rtmp1 = op->tmp3()->as_register(); ++ select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1); ++ } ++ ++ assert_different_registers(obj, k_RInfo, klass_RInfo); ++ ++ if (should_profile) { ++ profile_object(md, data, obj, klass_RInfo, obj_is_null); ++ } else { ++ __ beqz(obj, *obj_is_null); ++ } ++ ++ typecheck_loaded(op, k, k_RInfo); ++ __ verify_oop(obj); ++ ++ if (op->fast_check()) { ++ // get object class ++ // not a safepoint as obj null check happens earlier ++ __ load_klass(t0, obj); ++ __ bne(t0, k_RInfo, *failure_target, /* is_far */ true); ++ // successful cast, fall through to profile or jump ++ } else { ++ typecheck_helper_slowcheck(k, obj, Rtmp1, k_RInfo, klass_RInfo, failure_target, success_target); ++ } ++ if (should_profile) { ++ type_profile(obj, md, klass_RInfo, k_RInfo, data, success, failure, profile_cast_success, profile_cast_failure); ++ } ++ __ j(*success); ++} ++ ++void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { ++ const bool should_profile = op->should_profile(); ++ ++ LIR_Code code = op->code(); ++ if (code == lir_store_check) { ++ typecheck_lir_store(op, should_profile); ++ } else if (code == lir_checkcast) { ++ Register obj = op->object()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ Label success; ++ emit_typecheck_helper(op, &success, op->stub()->entry(), &success); ++ __ bind(success); ++ if (dst != obj) { ++ __ mv(dst, obj); ++ } ++ } else if (code == lir_instanceof) { ++ Register obj = op->object()->as_register(); ++ Register dst = op->result_opr()->as_register(); ++ Label success, failure, done; ++ emit_typecheck_helper(op, &success, &failure, &failure); ++ __ bind(failure); ++ __ mv(dst, zr); ++ __ j(done); ++ __ bind(success); ++ __ mv(dst, 1); ++ __ bind(done); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { ++ assert(VM_Version::supports_cx8(), "wrong machine"); ++ Register addr; ++ if (op->addr()->is_register()) { ++ addr = as_reg(op->addr()); ++ } else { ++ assert(op->addr()->is_address(), "what else?"); ++ LIR_Address* addr_ptr = op->addr()->as_address_ptr(); ++ assert(addr_ptr->disp() == 0, "need 0 disp"); ++ assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); ++ addr = as_reg(addr_ptr->base()); ++ } ++ Register newval = as_reg(op->new_value()); ++ Register cmpval = as_reg(op->cmp_value()); ++ ++ if (op->code() == lir_cas_obj) { ++ if (UseCompressedOops) { ++ Register tmp1 = op->tmp1()->as_register(); ++ assert(op->tmp1()->is_valid(), "must be"); ++ __ encode_heap_oop(tmp1, cmpval); ++ cmpval = tmp1; ++ __ encode_heap_oop(t1, newval); ++ newval = t1; ++ caswu(addr, newval, cmpval); ++ } else { ++ casl(addr, newval, cmpval); ++ } ++ } else if (op->code() == lir_cas_int) { ++ casw(addr, newval, cmpval); ++ } else { ++ casl(addr, newval, cmpval); ++ } ++} ++ ++void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { ++ switch (code) { ++ case lir_abs: __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break; ++ case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { ++ assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register"); ++ Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ if (dst->is_single_cpu()) { ++ Register Rdst = dst->as_register(); ++ if (right->is_constant()) { ++ int right_const = right->as_jint(); ++ if (Assembler::operand_valid_for_add_immediate(right_const)) { ++ logic_op_imm(Rdst, Rleft, right_const, code); ++ __ addw(Rdst, Rdst, zr); ++ } else { ++ __ mv(t0, right_const); ++ logic_op_reg32(Rdst, Rleft, t0, code); ++ } ++ } else { ++ Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); ++ logic_op_reg32(Rdst, Rleft, Rright, code); ++ } ++ } else { ++ Register Rdst = dst->as_register_lo(); ++ if (right->is_constant()) { ++ long right_const = right->as_jlong(); ++ if (Assembler::operand_valid_for_add_immediate(right_const)) { ++ logic_op_imm(Rdst, Rleft, right_const, code); ++ } else { ++ __ mv(t0, right_const); ++ logic_op_reg(Rdst, Rleft, t0, code); ++ } ++ } else { ++ Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); ++ logic_op_reg(Rdst, Rleft, Rright, code); ++ } ++ } ++} ++ ++void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr src, LIR_Opr result, LIR_Op2* op) { ++ ShouldNotCallThis(); ++} ++ ++void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) { ++ if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { ++ bool is_unordered_less = (code == lir_ucmp_fd2i); ++ if (left->is_single_fpu()) { ++ __ float_cmp(true, is_unordered_less ? -1 : 1, ++ left->as_float_reg(), right->as_float_reg(), dst->as_register()); ++ } else if (left->is_double_fpu()) { ++ __ float_cmp(false, is_unordered_less ? -1 : 1, ++ left->as_double_reg(), right->as_double_reg(), dst->as_register()); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (code == lir_cmp_l2i) { ++ __ cmp_l2i(dst->as_register(), left->as_register_lo(), right->as_register_lo()); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::align_call(LIR_Code code) { } ++ ++void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { ++ address call = __ trampoline_call(Address(op->addr(), rtype)); ++ if (call == NULL) { ++ bailout("trampoline stub overflow"); ++ return; ++ } ++ add_call_info(code_offset(), op->info()); ++} ++ ++void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { ++ address call = __ ic_call(op->addr()); ++ if (call == NULL) { ++ bailout("trampoline stub overflow"); ++ return; ++ } ++ add_call_info(code_offset(), op->info()); ++} ++ ++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { ShouldNotReachHere(); } ++ ++void LIR_Assembler::emit_static_call_stub() { ++ address call_pc = __ pc(); ++ address stub = __ start_a_stub(call_stub_size()); ++ if (stub == NULL) { ++ bailout("static call stub overflow"); ++ return; ++ } ++ ++ int start = __ offset(); ++ ++ __ relocate(static_stub_Relocation::spec(call_pc)); ++ __ emit_static_call_stub(); ++ ++ assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size() <= call_stub_size(), "stub too big"); ++ __ end_a_stub(); ++} ++ ++void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { ++ assert(exceptionOop->as_register() == x10, "must match"); ++ assert(exceptionPC->as_register() == x13, "must match"); ++ ++ // exception object is not added to oop map by LinearScan ++ // (LinearScan assumes that no oops are in fixed registers) ++ info->add_register_oop(exceptionOop); ++ Runtime1::StubID unwind_id; ++ ++ // get current pc information ++ // pc is only needed if the method has an exception handler, the unwind code does not need it. ++ if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) { ++ // As no instructions have been generated yet for this LIR node it's ++ // possible that an oop map already exists for the current offset. ++ // In that case insert an dummy NOP here to ensure all oop map PCs ++ // are unique. See JDK-8237483. ++ __ nop(); ++ } ++ int pc_for_athrow_offset = __ offset(); ++ InternalAddress pc_for_athrow(__ pc()); ++ int32_t off = 0; ++ __ la_patchable(exceptionPC->as_register(), pc_for_athrow, off); ++ __ addi(exceptionPC->as_register(), exceptionPC->as_register(), off); ++ add_call_info(pc_for_athrow_offset, info); // for exception handler ++ ++ __ verify_not_null_oop(x10); ++ // search an exception handler (x10: exception oop, x13: throwing pc) ++ if (compilation()->has_fpu_code()) { ++ unwind_id = Runtime1::handle_exception_id; ++ } else { ++ unwind_id = Runtime1::handle_exception_nofpu_id; ++ } ++ __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id))); ++ __ nop(); ++} ++ ++void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { ++ assert(exceptionOop->as_register() == x10, "must match"); ++ __ j(_unwind_handler_entry); ++} ++ ++ ++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { ++ Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); ++ Register count_reg = count->as_register(); ++ if (dest->is_single_cpu()) { ++ assert (dest->type() == T_INT, "unexpected result type"); ++ assert (left->type() == T_INT, "unexpected left type"); ++ __ andi(t0, count_reg, 31); // should not shift more than 31 bits ++ switch (code) { ++ case lir_shl: __ sllw(dest_reg, left_reg, t0); break; ++ case lir_shr: __ sraw(dest_reg, left_reg, t0); break; ++ case lir_ushr: __ srlw(dest_reg, left_reg, t0); break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (dest->is_double_cpu()) { ++ __ andi(t0, count_reg, 63); // should not shift more than 63 bits ++ switch (code) { ++ case lir_shl: __ sll(dest_reg, left_reg, t0); break; ++ case lir_shr: __ sra(dest_reg, left_reg, t0); break; ++ case lir_ushr: __ srl(dest_reg, left_reg, t0); break; ++ default: ShouldNotReachHere(); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { ++ Register left_reg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); ++ Register dest_reg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); ++ if (dest->is_single_cpu()) { ++ assert (dest->type() == T_INT, "unexpected result type"); ++ assert (left->type() == T_INT, "unexpected left type"); ++ count &= 0x1f; ++ if (count != 0) { ++ switch (code) { ++ case lir_shl: __ slliw(dest_reg, left_reg, count); break; ++ case lir_shr: __ sraiw(dest_reg, left_reg, count); break; ++ case lir_ushr: __ srliw(dest_reg, left_reg, count); break; ++ default: ShouldNotReachHere(); ++ } ++ } else { ++ move_regs(left_reg, dest_reg); ++ } ++ } else if (dest->is_double_cpu()) { ++ count &= 0x3f; ++ if (count != 0) { ++ switch (code) { ++ case lir_shl: __ slli(dest_reg, left_reg, count); break; ++ case lir_shr: __ srai(dest_reg, left_reg, count); break; ++ case lir_ushr: __ srli(dest_reg, left_reg, count); break; ++ default: ShouldNotReachHere(); ++ } ++ } else { ++ move_regs(left->as_register_lo(), dest->as_register_lo()); ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++ ++void LIR_Assembler::emit_lock(LIR_OpLock* op) { ++ Register obj = op->obj_opr()->as_register(); // may not be an oop ++ Register hdr = op->hdr_opr()->as_register(); ++ Register lock = op->lock_opr()->as_register(); ++ if (!UseFastLocking) { ++ __ j(*op->stub()->entry()); ++ } else if (op->code() == lir_lock) { ++ Register scratch = noreg; ++ if (UseBiasedLocking) { ++ scratch = op->scratch_opr()->as_register(); ++ } ++ assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); ++ // add debug info for NullPointerException only if one is possible ++ int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); ++ if (op->info() != NULL) { ++ add_debug_info_for_null_check(null_check_offset, op->info()); ++ } ++ } else if (op->code() == lir_unlock) { ++ assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); ++ __ unlock_object(hdr, obj, lock, *op->stub()->entry()); ++ } else { ++ Unimplemented(); ++ } ++ __ bind(*op->stub()->continuation()); ++} ++ ++void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ++ ciMethod* method = op->profiled_method(); ++ int bci = op->profiled_bci(); ++ ++ // Update counter for all call types ++ ciMethodData* md = method->method_data_or_null(); ++ guarantee(md != NULL, "Sanity"); ++ ciProfileData* data = md->bci_to_data(bci); ++ assert(data != NULL && data->is_CounterData(), "need CounterData for calls"); ++ assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); ++ Register mdo = op->mdo()->as_register(); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ // Perform additional virtual call profiling for invokevirtual and ++ // invokeinterface bytecodes ++ if (op->should_profile_receiver_type()) { ++ assert(op->recv()->is_single_cpu(), "recv must be allocated"); ++ Register recv = op->recv()->as_register(); ++ assert_different_registers(mdo, recv); ++ assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); ++ ciKlass* known_klass = op->known_holder(); ++ if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { ++ // We know the type that will be seen at this call site; we can ++ // statically update the MethodData* rather than needing to do ++ // dynamic tests on the receiver type ++ // NOTE: we should probably put a lock around this search to ++ // avoid collisions by concurrent compilations ++ ciVirtualCallData* vc_data = (ciVirtualCallData*) data; ++ uint i; ++ for (i = 0; i < VirtualCallData::row_limit(); i++) { ++ ciKlass* receiver = vc_data->receiver(i); ++ if (known_klass->equals(receiver)) { ++ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); ++ __ increment(data_addr, DataLayout::counter_increment); ++ return; ++ } ++ } ++ ++ // Receiver type not found in profile data; select an empty slot ++ // Note that this is less efficient than it should be because it ++ // always does a write to the receiver part of the ++ // VirtualCallData rather than just the first time ++ for (i = 0; i < VirtualCallData::row_limit(); i++) { ++ ciKlass* receiver = vc_data->receiver(i); ++ if (receiver == NULL) { ++ Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); ++ __ mov_metadata(t1, known_klass->constant_encoding()); ++ __ sd(t1, recv_addr); ++ Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); ++ __ increment(data_addr, DataLayout::counter_increment); ++ return; ++ } ++ } ++ } else { ++ __ load_klass(recv, recv); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, &update_done); ++ // Receiver did not match any saved receiver and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ __ increment(counter_addr, DataLayout::counter_increment); ++ ++ __ bind(update_done); ++ } ++ } else { ++ // Static call ++ __ increment(counter_addr, DataLayout::counter_increment); ++ } ++} ++ ++void LIR_Assembler::emit_delay(LIR_OpDelay*) { Unimplemented(); } ++ ++void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { ++ __ la(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); ++} ++ ++void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { Unimplemented(); } ++ ++void LIR_Assembler::check_conflict(ciKlass* exact_klass, intptr_t current_klass, ++ Register tmp, Label &next, Label &none, ++ Address mdo_addr) { ++ if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { ++ if (exact_klass != NULL) { ++ __ mov_metadata(tmp, exact_klass->constant_encoding()); ++ } else { ++ __ load_klass(tmp, tmp); ++ } ++ ++ __ ld(t1, mdo_addr); ++ __ xorr(tmp, tmp, t1); ++ __ andi(t0, tmp, TypeEntries::type_klass_mask); ++ // klass seen before, nothing to do. The unknown bit may have been ++ // set already but no need to check. ++ __ beqz(t0, next); ++ ++ // already unknown. Nothing to do anymore. ++ __ andi(t0, tmp, TypeEntries::type_unknown); ++ __ bnez(t0, next); ++ ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ beqz(t1, none); ++ __ mv(t0, (u1)TypeEntries::null_seen); ++ __ beq(t0, t1, none); ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ __ membar(MacroAssembler::LoadLoad); ++ __ ld(t1, mdo_addr); ++ __ xorr(tmp, tmp, t1); ++ __ andi(t0, tmp, TypeEntries::type_klass_mask); ++ __ beqz(t0, next); ++ } ++ } else { ++ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && ++ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); ++ ++ __ ld(tmp, mdo_addr); ++ // already unknown. Nothing to do anymore. ++ __ andi(t0, tmp, TypeEntries::type_unknown); ++ __ bnez(t0, next); ++ } ++ ++ // different than before. Cannot keep accurate profile. ++ __ ld(t1, mdo_addr); ++ __ ori(t1, t1, TypeEntries::type_unknown); ++ __ sd(t1, mdo_addr); ++ ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ j(next); ++ ++ __ bind(none); ++ // first time here. Set profile type. ++ __ sd(tmp, mdo_addr); ++ } ++} ++ ++void LIR_Assembler::check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, ++ Address mdo_addr, Label &next) { ++ // There's a single possible klass at this profile point ++ assert(exact_klass != NULL, "should be"); ++ if (TypeEntries::is_type_none(current_klass)) { ++ __ mov_metadata(tmp, exact_klass->constant_encoding()); ++ __ ld(t1, mdo_addr); ++ __ xorr(tmp, tmp, t1); ++ __ andi(t0, tmp, TypeEntries::type_klass_mask); ++ __ beqz(t0, next); ++#ifdef ASSERT ++ { ++ Label ok; ++ __ ld(t0, mdo_addr); ++ __ beqz(t0, ok); ++ __ mv(t1, (u1)TypeEntries::null_seen); ++ __ beq(t0, t1, ok); ++ // may have been set by another thread ++ __ membar(MacroAssembler::LoadLoad); ++ __ mov_metadata(t0, exact_klass->constant_encoding()); ++ __ ld(t1, mdo_addr); ++ __ xorr(t1, t0, t1); ++ __ andi(t1, t1, TypeEntries::type_mask); ++ __ beqz(t1, ok); ++ ++ __ stop("unexpected profiling mismatch"); ++ __ bind(ok); ++ } ++#endif ++ // first time here. Set profile type. ++ __ sd(tmp, mdo_addr); ++ } else { ++ assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && ++ ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); ++ ++ __ ld(tmp, mdo_addr); ++ // already unknown. Nothing to do anymore. ++ __ andi(t0, tmp, TypeEntries::type_unknown); ++ __ bnez(t0, next); ++ ++ __ ori(tmp, tmp, TypeEntries::type_unknown); ++ __ sd(tmp, mdo_addr); ++ } ++} ++ ++void LIR_Assembler::check_null(Register tmp, Label &update, intptr_t current_klass, ++ Address mdo_addr, bool do_update, Label &next) { ++ __ bnez(tmp, update); ++ if (!TypeEntries::was_null_seen(current_klass)) { ++ __ ld(t1, mdo_addr); ++ __ ori(t1, t1, TypeEntries::null_seen); ++ __ sd(t1, mdo_addr); ++ } ++ if (do_update) { ++ __ j(next); ++ } ++} ++ ++void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { ++ COMMENT("emit_profile_type {"); ++ Register obj = op->obj()->as_register(); ++ Register tmp = op->tmp()->as_pointer_register(); ++ Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); ++ ciKlass* exact_klass = op->exact_klass(); ++ intptr_t current_klass = op->current_klass(); ++ bool not_null = op->not_null(); ++ bool no_conflict = op->no_conflict(); ++ ++ Label update, next, none; ++ ++ bool do_null = !not_null; ++ bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; ++ bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; ++ ++ assert(do_null || do_update, "why are we here?"); ++ assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); ++ assert_different_registers(tmp, t0, t1, mdo_addr.base()); ++ ++ __ verify_oop(obj); ++ ++ if (tmp != obj) { ++ __ mv(tmp, obj); ++ } ++ if (do_null) { ++ check_null(tmp, update, current_klass, mdo_addr, do_update, next); ++#ifdef ASSERT ++ } else { ++ __ bnez(tmp, update); ++ __ stop("unexpected null obj"); ++#endif ++ } ++ ++ __ bind(update); ++ ++ if (do_update) { ++#ifdef ASSERT ++ if (exact_klass != NULL) { ++ check_exact_klass(tmp, exact_klass); ++ } ++#endif ++ if (!no_conflict) { ++ check_conflict(exact_klass, current_klass, tmp, next, none, mdo_addr); ++ } else { ++ check_no_conflict(exact_klass, current_klass, tmp, mdo_addr, next); ++ } ++ ++ __ bind(next); ++ } ++ COMMENT("} emit_profile_type"); ++} ++ ++void LIR_Assembler::align_backward_branch_target() { } ++ ++void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { ++ // tmp must be unused ++ assert(tmp->is_illegal(), "wasting a register if tmp is allocated"); ++ ++ if (left->is_single_cpu()) { ++ assert(dest->is_single_cpu(), "expect single result reg"); ++ __ negw(dest->as_register(), left->as_register()); ++ } else if (left->is_double_cpu()) { ++ assert(dest->is_double_cpu(), "expect double result reg"); ++ __ neg(dest->as_register_lo(), left->as_register_lo()); ++ } else if (left->is_single_fpu()) { ++ assert(dest->is_single_fpu(), "expect single float result reg"); ++ __ fneg_s(dest->as_float_reg(), left->as_float_reg()); ++ } else { ++ assert(left->is_double_fpu(), "expect double float operand reg"); ++ assert(dest->is_double_fpu(), "expect double float result reg"); ++ __ fneg_d(dest->as_double_reg(), left->as_double_reg()); ++ } ++} ++ ++ ++void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { ++#if INCLUDE_SHENANDOAHGC ++ if (UseShenandoahGC && patch_code != lir_patch_none) { ++ deoptimize_trap(info); ++ return; ++ } ++#endif ++ assert(patch_code == lir_patch_none, "Patch code not supported"); ++ LIR_Address* adr = addr->as_address_ptr(); ++ Register dst = dest->as_register_lo(); ++ ++ assert_different_registers(dst, t0); ++ if(adr->base()->is_valid() && dst == adr->base()->as_pointer_register() && (!adr->index()->is_cpu_register())) { ++ ++ intptr_t offset = adr->disp(); ++ LIR_Opr index_op = adr->index(); ++ int scale = adr->scale(); ++ if(index_op->is_constant()) { ++ offset += ((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale; ++ } ++ ++ if(!is_imm_in_range(offset, 12, 0)) { ++ __ la(t0, as_Address(adr)); ++ __ mv(dst, t0); ++ return; ++ } ++ } ++ __ la(dst, as_Address(adr)); ++} ++ ++ ++void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { ++ assert(!tmp->is_valid(), "don't need temporary"); ++ ++ CodeBlob *cb = CodeCache::find_blob(dest); ++ if (cb != NULL) { ++ __ far_call(RuntimeAddress(dest)); ++ } else { ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(dest), offset); ++ __ jalr(x1, t0, offset); ++ } ++ ++ if (info != NULL) { ++ add_call_info_here(info); ++ } ++} ++ ++void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { ++ if (dest->is_address() || src->is_address()) { ++ move_op(src, dest, type, lir_patch_none, info, /* pop_fpu_stack */ false, ++ /* unaligned */ false, /* wide */ false); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++#ifdef ASSERT ++// emit run-time assertion ++void LIR_Assembler::emit_assert(LIR_OpAssert* op) { ++ assert(op->code() == lir_assert, "must be"); ++ ++ Label ok; ++ if (op->in_opr1()->is_valid()) { ++ assert(op->in_opr2()->is_valid(), "both operands must be valid"); ++ bool is_unordered = false; ++ LIR_Condition cond = op->condition(); ++ emit_branch(cond, op->in_opr1(), op->in_opr2(), ok, /* is_far */ false, ++ /* is_unordered */(cond == lir_cond_greaterEqual || cond == lir_cond_greater) ? false : true); ++ } else { ++ assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); ++ assert(op->condition() == lir_cond_always, "no other conditions allowed"); ++ } ++ ++ if (op->halt()) { ++ const char* str = __ code_string(op->msg()); ++ __ stop(str); ++ } else { ++ breakpoint(); ++ } ++ __ bind(ok); ++} ++#endif ++ ++#ifndef PRODUCT ++#define COMMENT(x) do { __ block_comment(x); } while (0) ++#else ++#define COMMENT(x) ++#endif ++ ++void LIR_Assembler::membar() { ++ COMMENT("membar"); ++ __ membar(MacroAssembler::AnyAny); ++} ++ ++void LIR_Assembler::membar_acquire() { ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++} ++ ++void LIR_Assembler::membar_release() { ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++} ++ ++void LIR_Assembler::membar_loadload() { ++ __ membar(MacroAssembler::LoadLoad); ++} ++ ++void LIR_Assembler::membar_storestore() { ++ __ membar(MacroAssembler::StoreStore); ++} ++ ++void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); } ++ ++void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); } ++ ++void LIR_Assembler::on_spin_wait() { ++ Unimplemented(); ++} ++ ++void LIR_Assembler::get_thread(LIR_Opr result_reg) { ++ __ mv(result_reg->as_register(), xthread); ++} ++ ++void LIR_Assembler::peephole(LIR_List *lir) {} ++ ++void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) { ++ Address addr = as_Address(src->as_address_ptr()); ++ BasicType type = src->type(); ++ bool is_oop = type == T_OBJECT || type == T_ARRAY; ++ ++ get_op(type); ++ ++ switch (code) { ++ case lir_xadd: ++ { ++ RegisterOrConstant inc; ++ Register tmp = as_reg(tmp_op); ++ Register dst = as_reg(dest); ++ if (data->is_constant()) { ++ inc = RegisterOrConstant(as_long(data)); ++ assert_different_registers(dst, addr.base(), tmp); ++ assert_different_registers(tmp, t0); ++ } else { ++ inc = RegisterOrConstant(as_reg(data)); ++ assert_different_registers(inc.as_register(), dst, addr.base(), tmp); ++ } ++ __ la(tmp, addr); ++ (_masm->*add)(dst, inc, tmp); ++ break; ++ } ++ case lir_xchg: ++ { ++ Register tmp = tmp_op->as_register(); ++ Register obj = as_reg(data); ++ Register dst = as_reg(dest); ++ if (is_oop && UseCompressedOops) { ++ __ encode_heap_oop(t0, obj); ++ obj = t0; ++ } ++ assert_different_registers(obj, addr.base(), tmp, dst); ++ __ la(tmp, addr); ++ (_masm->*xchg)(dst, obj, tmp); ++ if (is_oop && UseCompressedOops) { ++ __ decode_heap_oop(dst); ++ } ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ __ membar(MacroAssembler::AnyAny); ++} ++ ++int LIR_Assembler::array_element_size(BasicType type) const { ++ int elem_size = type2aelembytes(type); ++ return exact_log2(elem_size); ++} ++ ++Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { ++ if (addr->base()->is_illegal()) { ++ assert(addr->index()->is_illegal(), "must be illegal too"); ++ __ movptr(tmp, addr->disp()); ++ return Address(tmp, 0); ++ } ++ ++ Register base = addr->base()->as_pointer_register(); ++ LIR_Opr index_op = addr->index(); ++ int scale = addr->scale(); ++ ++ if (index_op->is_illegal()) { ++ return Address(base, addr->disp()); ++ } else if (index_op->is_cpu_register()) { ++ Register index; ++ if (index_op->is_single_cpu()) { ++ index = index_op->as_register(); ++ } else { ++ index = index_op->as_register_lo(); ++ } ++ if (scale != 0) { ++ __ shadd(tmp, index, base, tmp, scale); ++ } else { ++ __ add(tmp, base, index); ++ } ++ return Address(tmp, addr->disp()); ++ } else if (index_op->is_constant()) { ++ intptr_t addr_offset = (((intptr_t)index_op->as_constant_ptr()->as_jint()) << scale) + addr->disp(); ++ return Address(base, addr_offset); ++ } ++ ++ Unimplemented(); ++ return Address(); ++} ++ ++// helper functions which checks for overflow and sets bailout if it ++// occurs. Always returns a valid embeddable pointer but in the ++// bailout case the pointer won't be to unique storage. ++address LIR_Assembler::float_constant(float f) { ++ address const_addr = __ float_constant(f); ++ if (const_addr == NULL) { ++ bailout("const section overflow"); ++ return __ code()->consts()->start(); ++ } else { ++ return const_addr; ++ } ++} ++ ++address LIR_Assembler::double_constant(double d) { ++ address const_addr = __ double_constant(d); ++ if (const_addr == NULL) { ++ bailout("const section overflow"); ++ return __ code()->consts()->start(); ++ } else { ++ return const_addr; ++ } ++} ++ ++address LIR_Assembler::int_constant(jlong n) { ++ address const_addr = __ long_constant(n); ++ if (const_addr == NULL) { ++ bailout("const section overflow"); ++ return __ code()->consts()->start(); ++ } else { ++ return const_addr; ++ } ++} ++ ++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { ++ _masm->code_section()->relocate(adr, relocInfo::poll_type); ++ int pc_offset = code_offset(); ++ flush_debug_info(pc_offset); ++ info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); ++ if (info->exception_handlers() != NULL) { ++ compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); ++ } ++} ++ ++void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) { ++ __ cmpxchg(addr, cmpval, newval, Assembler::int32, Assembler::aq /* acquire */, ++ Assembler::rl /* release */, t0, true /* result as bool */); ++ __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1 ++ __ membar(MacroAssembler::AnyAny); ++} ++ ++void LIR_Assembler::caswu(Register addr, Register newval, Register cmpval) { ++ __ cmpxchg(addr, cmpval, newval, Assembler::uint32, Assembler::aq /* acquire */, ++ Assembler::rl /* release */, t0, true /* result as bool */); ++ __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1 ++ __ membar(MacroAssembler::AnyAny); ++} ++ ++void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { ++ __ cmpxchg(addr, cmpval, newval, Assembler::int64, Assembler::aq /* acquire */, ++ Assembler::rl /* release */, t0, true /* result as bool */); ++ __ seqz(t0, t0); // cmpxchg not equal, set t0 to 1 ++ __ membar(MacroAssembler::AnyAny); ++} ++ ++void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { ++ address target = NULL; ++ ++ switch (patching_id(info)) { ++ case PatchingStub::access_field_id: ++ target = Runtime1::entry_for(Runtime1::access_field_patching_id); ++ break; ++ case PatchingStub::load_klass_id: ++ target = Runtime1::entry_for(Runtime1::load_klass_patching_id); ++ break; ++ case PatchingStub::load_mirror_id: ++ target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); ++ break; ++ case PatchingStub::load_appendix_id: ++ target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ far_call(RuntimeAddress(target)); ++ add_call_info_here(info); ++} ++ ++ ++void LIR_Assembler::check_exact_klass(Register tmp, ciKlass* exact_klass) { ++ Label ok; ++ __ load_klass(tmp, tmp); ++ __ mov_metadata(t0, exact_klass->constant_encoding()); ++ __ beq(tmp, t0, ok); ++ __ stop("exact klass and actual klass differ"); ++ __ bind(ok); ++} ++ ++void LIR_Assembler::get_op(BasicType type) { ++ switch (type) { ++ case T_INT: ++ xchg = &MacroAssembler::atomic_xchgalw; ++ add = &MacroAssembler::atomic_addalw; ++ break; ++ case T_LONG: ++ xchg = &MacroAssembler::atomic_xchgal; ++ add = &MacroAssembler::atomic_addal; ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ if (UseCompressedOops) { ++ xchg = &MacroAssembler::atomic_xchgalwu; ++ add = &MacroAssembler::atomic_addalw; ++ } else { ++ xchg = &MacroAssembler::atomic_xchgal; ++ add = &MacroAssembler::atomic_addal; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++// emit_opTypeCheck sub functions ++void LIR_Assembler::typecheck_lir_store(LIR_OpTypeCheck* op, bool should_profile) { ++ Register value = op->object()->as_register(); ++ Register array = op->array()->as_register(); ++ Register k_RInfo = op->tmp1()->as_register(); ++ Register klass_RInfo = op->tmp2()->as_register(); ++ Register Rtmp1 = op->tmp3()->as_register(); ++ ++ CodeStub* stub = op->stub(); ++ ++ // check if it needs to be profiled ++ ciMethodData* md = NULL; ++ ciProfileData* data = NULL; ++ ++ if (should_profile) { ++ data_check(op, &md, &data); ++ } ++ Label profile_cast_success, profile_cast_failure, done; ++ Label *success_target = should_profile ? &profile_cast_success : &done; ++ Label *failure_target = should_profile ? &profile_cast_failure : stub->entry(); ++ ++ if (should_profile) { ++ profile_object(md, data, value, klass_RInfo, &done); ++ } else { ++ __ beqz(value, done); ++ } ++ ++ add_debug_info_for_null_check_here(op->info_for_exception()); ++ __ load_klass(k_RInfo, array); ++ __ load_klass(klass_RInfo, value); ++ ++ lir_store_slowcheck(k_RInfo, klass_RInfo, Rtmp1, success_target, failure_target); ++ ++ // fall through to the success case ++ if (should_profile) { ++ Register mdo = klass_RInfo; ++ Register recv = k_RInfo; ++ __ bind(profile_cast_success); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ __ load_klass(recv, value); ++ type_profile_helper(mdo, md, data, recv, &done); ++ __ j(done); ++ ++ __ bind(profile_cast_failure); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); ++ __ ld(t1, counter_addr); ++ __ addi(t1, t1, -DataLayout::counter_increment); ++ __ sd(t1, counter_addr); ++ __ j(*stub->entry()); ++ } ++ ++ __ bind(done); ++} ++ ++void LIR_Assembler::type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo, ++ ciProfileData* data, Label* success, Label* failure, ++ Label& profile_cast_success, Label& profile_cast_failure) { ++ Register mdo = klass_RInfo; ++ Register recv = k_RInfo; ++ __ bind(profile_cast_success); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ __ load_klass(recv, obj); ++ Label update_done; ++ type_profile_helper(mdo, md, data, recv, success); ++ __ j(*success); ++ ++ __ bind(profile_cast_failure); ++ __ mov_metadata(mdo, md->constant_encoding()); ++ Address counter_addr = __ form_address(mdo, /* base */ ++ md->byte_offset_of_slot(data, CounterData::count_offset()), /* offset */ ++ 12, /* expect offset bits */ ++ t1); /* temp reg */ ++ __ ld(t0, counter_addr); ++ __ addi(t0, t0, -DataLayout::counter_increment); ++ __ sd(t0, counter_addr); ++ __ j(*failure); ++} ++ ++void LIR_Assembler::lir_store_slowcheck(Register k_RInfo, Register klass_RInfo, Register Rtmp1, ++ Label* success_target, Label* failure_target) { ++ // get instance klass (it's already uncompressed) ++ __ ld(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); ++ // perform the fast part of the checking logic ++ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); ++ // call out-of-line instance of __ check_klass_subtype_slow_path(...) ++ __ addi(sp, sp, -2 * wordSize); // 2: store k_RInfo and klass_RInfo ++ __ sd(klass_RInfo, Address(sp, wordSize)); // sub klass ++ __ sd(k_RInfo, Address(sp, 0)); // super klass ++ __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); ++ // load result to k_RInfo ++ __ ld(k_RInfo, Address(sp, 0)); ++ __ addi(sp, sp, 2 * wordSize); // 2: pop out k_RInfo and klass_RInfo ++ // result is a boolean ++ __ beqz(k_RInfo, *failure_target, /* is_far */ true); ++} ++ ++void LIR_Assembler::const2reg_helper(LIR_Opr src) { ++ switch (src->as_constant_ptr()->type()) { ++ case T_INT: ++ case T_ADDRESS: ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_METADATA: ++ const2reg(src, FrameMap::t0_opr, lir_patch_none, NULL); ++ break; ++ case T_LONG: ++ const2reg(src, FrameMap::t0_long_opr, lir_patch_none, NULL); ++ break; ++ case T_FLOAT: ++ case T_DOUBLE: ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::logic_op_reg32(Register dst, Register left, Register right, LIR_Code code) { ++ switch (code) { ++ case lir_logic_and: __ andrw(dst, left, right); break; ++ case lir_logic_or: __ orrw (dst, left, right); break; ++ case lir_logic_xor: __ xorrw(dst, left, right); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::logic_op_reg(Register dst, Register left, Register right, LIR_Code code) { ++ switch (code) { ++ case lir_logic_and: __ andr(dst, left, right); break; ++ case lir_logic_or: __ orr (dst, left, right); break; ++ case lir_logic_xor: __ xorr(dst, left, right); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::logic_op_imm(Register dst, Register left, int right, LIR_Code code) { ++ switch (code) { ++ case lir_logic_and: __ andi(dst, left, right); break; ++ case lir_logic_or: __ ori (dst, left, right); break; ++ case lir_logic_xor: __ xori(dst, left, right); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void LIR_Assembler::store_parameter(Register r, int offset_from_rsp_in_words) { ++ assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); ++ int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; ++ assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); ++ __ sd(r, Address(sp, offset_from_rsp_in_bytes)); ++} ++ ++void LIR_Assembler::store_parameter(jint c, int offset_from_rsp_in_words) { ++ assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp"); ++ int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord; ++ assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); ++ __ mv(t0, c); ++ __ sd(t0, Address(sp, offset_from_rsp_in_bytes)); ++} ++ ++#undef __ +diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp +new file mode 100644 +index 000000000..11a47fd6e +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.hpp +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP ++#define CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP ++ ++// ArrayCopyStub needs access to bailout ++friend class ArrayCopyStub; ++ ++private: ++ ++#include "c1_LIRAssembler_arith_riscv.hpp" ++#include "c1_LIRAssembler_arraycopy_riscv.hpp" ++ ++ int array_element_size(BasicType type) const; ++ ++ static Register as_reg(LIR_Opr op) { ++ return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); ++ } ++ ++ Address as_Address(LIR_Address* addr, Register tmp); ++ ++ // Ensure we have a valid Address (base+offset) to a stack-slot. ++ Address stack_slot_address(int index, uint shift, int adjust = 0); ++ ++ // helper functions which checks for overflow and sets bailout if it ++ // occurs. Always returns a valid embeddable pointer but in the ++ // bailout case the pointer won't be to unique storage. ++ address float_constant(float f); ++ address double_constant(double d); ++ address int_constant(jlong n); ++ ++ // Record the type of the receiver in ReceiverTypeData ++ void type_profile_helper(Register mdo, ++ ciMethodData *md, ciProfileData *data, ++ Register recv, Label* update_done); ++ ++ void add_debug_info_for_branch(address adr, CodeEmitInfo* info); ++ ++ void casw(Register addr, Register newval, Register cmpval); ++ void caswu(Register addr, Register newval, Register cmpval); ++ void casl(Register addr, Register newval, Register cmpval); ++ ++ void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL); ++ ++ void deoptimize_trap(CodeEmitInfo *info); ++ ++ enum ++ { ++ // see emit_static_call_stub for detail: ++ // CompiledStaticCall::to_interp_stub_size() (14) + CompiledStaticCall::to_trampoline_stub_size() (1 + 3 + address) ++ _call_stub_size = 14 * NativeInstruction::instruction_size + ++ (NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size), ++ _call_aot_stub_size = 0, ++ // see emit_exception_handler for detail: ++ // verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY) ++ _exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller ++ // see emit_deopt_handler for detail ++ // auipc (1) + far_jump (6 or 2) ++ _deopt_handler_size = 1 * NativeInstruction::instruction_size + ++ 6 * NativeInstruction::instruction_size // or smaller ++ }; ++ ++ void check_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, ++ Label &next, Label &none, Address mdo_addr); ++ void check_no_conflict(ciKlass* exact_klass, intptr_t current_klass, Register tmp, Address mdo_addr, Label &next); ++ ++ void check_exact_klass(Register tmp, ciKlass* exact_klass); ++ void check_null(Register tmp, Label &update, intptr_t current_klass, Address mdo_addr, bool do_update, Label &next); ++ ++ void (MacroAssembler::*add)(Register prev, RegisterOrConstant incr, Register addr); ++ void (MacroAssembler::*xchg)(Register prev, Register newv, Register addr); ++ void get_op(BasicType type); ++ ++ // emit_typecheck_helper sub functions ++ void data_check(LIR_OpTypeCheck *op, ciMethodData **md, ciProfileData **data); ++ void typecheck_helper_slowcheck(ciKlass* k, Register obj, Register Rtmp1, ++ Register k_RInfo, Register klass_RInfo, ++ Label* failure_target, Label* success_target); ++ void profile_object(ciMethodData* md, ciProfileData* data, Register obj, ++ Register klass_RInfo, Label* obj_is_null); ++ void typecheck_loaded(LIR_OpTypeCheck* op, ciKlass* k, Register k_RInfo); ++ ++ // emit_opTypeCheck sub functions ++ void typecheck_lir_store(LIR_OpTypeCheck* op, bool should_profile); ++ ++ void type_profile(Register obj, ciMethodData* md, Register klass_RInfo, Register k_RInfo, ++ ciProfileData* data, Label* success, Label* failure, ++ Label& profile_cast_success, Label& profile_cast_failure); ++ ++ void lir_store_slowcheck(Register k_RInfo, Register klass_RInfo, Register Rtmp1, ++ Label* success_target, Label* failure_target); ++ ++ void const2reg_helper(LIR_Opr src); ++ ++ void emit_branch(LIR_Condition cmp_flag, LIR_Opr cmp1, LIR_Opr cmp2, Label& label, bool is_far, bool is_unordered); ++ ++ void logic_op_reg32(Register dst, Register left, Register right, LIR_Code code); ++ void logic_op_reg(Register dst, Register left, Register right, LIR_Code code); ++ void logic_op_imm(Register dst, Register left, int right, LIR_Code code); ++ ++public: ++ ++ void emit_cmove(LIR_Op4* op); ++ ++ void store_parameter(Register r, int offset_from_rsp_in_words); ++ void store_parameter(jint c, int offset_from_rsp_in_words); ++ ++#endif // CPU_RISCV_C1_LIRASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +new file mode 100644 +index 000000000..8ba9ed66d +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +@@ -0,0 +1,1083 @@ ++/* ++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "c1/c1_Compilation.hpp" ++#include "c1/c1_FrameMap.hpp" ++#include "c1/c1_Instruction.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_LIRGenerator.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "c1/c1_ValueStack.hpp" ++#include "ci/ciArray.hpp" ++#include "ci/ciObjArrayKlass.hpp" ++#include "ci/ciTypeArrayKlass.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_riscv.inline.hpp" ++ ++#ifdef ASSERT ++#define __ gen()->lir(__FILE__, __LINE__)-> ++#else ++#define __ gen()->lir()-> ++#endif ++ ++// Item will be loaded into a byte register; Intel only ++void LIRItem::load_byte_item() { ++ load_item(); ++} ++ ++ ++void LIRItem::load_nonconstant() { ++ LIR_Opr r = value()->operand(); ++ if (r->is_constant()) { ++ _result = r; ++ } else { ++ load_item(); ++ } ++} ++ ++//-------------------------------------------------------------- ++// LIRGenerator ++//-------------------------------------------------------------- ++ ++ ++LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::r10_oop_opr; } ++LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::r13_opr; } ++LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } ++LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } ++LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::r10_opr; } ++LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } ++ ++ ++LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { ++ LIR_Opr opr; ++ switch (type->tag()) { ++ case intTag: opr = FrameMap::r10_opr; break; ++ case objectTag: opr = FrameMap::r10_oop_opr; break; ++ case longTag: opr = FrameMap::long10_opr; break; ++ case floatTag: opr = FrameMap::fpu10_float_opr; break; ++ case doubleTag: opr = FrameMap::fpu10_double_opr; break; ++ ++ case addressTag: // fall through ++ default: ++ ShouldNotReachHere(); ++ return LIR_OprFact::illegalOpr; ++ } ++ ++ assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); ++ return opr; ++} ++ ++ ++LIR_Opr LIRGenerator::rlock_byte(BasicType type) { ++ LIR_Opr reg = new_register(T_INT); ++ set_vreg_flag(reg, LIRGenerator::byte_reg); ++ return reg; ++} ++ ++//--------- loading items into registers -------------------------------- ++ ++ ++bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { ++ if (v->type()->as_IntConstant() != NULL) { ++ return v->type()->as_IntConstant()->value() == 0; ++ } else if (v->type()->as_LongConstant() != NULL) { ++ return v->type()->as_LongConstant()->value() == 0; ++ } else if (v->type()->as_ObjectConstant() != NULL) { ++ return v->type()->as_ObjectConstant()->value()->is_null_object(); ++ } else if (v->type()->as_FloatConstant() != NULL) { ++ return jint_cast(v->type()->as_FloatConstant()->value()) == 0.0f; ++ } else if (v->type()->as_DoubleConstant() != NULL) { ++ return jlong_cast(v->type()->as_DoubleConstant()->value()) == 0.0; ++ } ++ return false; ++} ++ ++bool LIRGenerator::can_inline_as_constant(Value v) const { ++ if (v->type()->as_IntConstant() != NULL) { ++ int value = v->type()->as_IntConstant()->value(); ++ // "-value" must be defined for value may be used for sub ++ return Assembler::operand_valid_for_add_immediate(value) && ++ Assembler::operand_valid_for_add_immediate(- value); ++ } else if (v->type()->as_ObjectConstant() != NULL) { ++ return v->type()->as_ObjectConstant()->value()->is_null_object(); ++ } else if (v->type()->as_LongConstant() != NULL) { ++ long value = v->type()->as_LongConstant()->value(); ++ // "-value" must be defined for value may be used for sub ++ return Assembler::operand_valid_for_add_immediate(value) && ++ Assembler::operand_valid_for_add_immediate(- value); ++ } else if (v->type()->as_FloatConstant() != NULL) { ++ return v->type()->as_FloatConstant()->value() == 0.0f; ++ } else if (v->type()->as_DoubleConstant() != NULL) { ++ return v->type()->as_DoubleConstant()->value() == 0.0; ++ } ++ return false; ++} ++ ++ ++bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { ++ if (c->as_constant() != NULL) { ++ long constant = 0; ++ switch (c->type()) { ++ case T_INT: constant = c->as_jint(); break; ++ case T_LONG: constant = c->as_jlong(); break; ++ default: return false; ++ } ++ // "-constant" must be defined for c may be used for sub ++ return Assembler::operand_valid_for_add_immediate(constant) && ++ Assembler::operand_valid_for_add_immediate(- constant); ++ } ++ return false; ++} ++ ++ ++LIR_Opr LIRGenerator::safepoint_poll_register() { ++ return LIR_OprFact::illegalOpr; ++} ++ ++LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, ++ int shift, int disp, BasicType type) { ++ assert(base->is_register(), "must be"); ++ ++ if (index->is_constant()) { ++ LIR_Const *constant = index->as_constant_ptr(); ++ jlong c; ++ if (constant->type() == T_INT) { ++ c = (jlong(index->as_jint()) << shift) + disp; ++ } else { ++ assert(constant->type() == T_LONG, "should be"); ++ c = (index->as_jlong() << shift) + disp; ++ } ++ if ((jlong)((jint)c) == c) { ++ return new LIR_Address(base, (jint)c, type); ++ } else { ++ LIR_Opr tmp = new_register(T_LONG); ++ __ move(index, tmp); ++ return new LIR_Address(base, tmp, type); ++ } ++ } ++ ++ return new LIR_Address(base, index, (LIR_Address::Scale)shift, disp, type); ++} ++ ++LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, ++ BasicType type) { ++ int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); ++ int elem_size = type2aelembytes(type); ++ int shift = exact_log2(elem_size); ++ ++ return generate_address(array_opr, index_opr, shift, offset_in_bytes, type); ++} ++ ++LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { ++ switch (type) { ++ case T_LONG: return LIR_OprFact::longConst(x); ++ case T_INT: return LIR_OprFact::intConst(x); ++ default: ShouldNotReachHere(); ++ } ++ return NULL; ++} ++ ++void LIRGenerator::increment_counter(address counter, BasicType type, int step) { ++ LIR_Opr pointer = new_pointer_register(); ++ __ move(LIR_OprFact::intptrConst(counter), pointer); ++ LIR_Address* addr = new LIR_Address(pointer, type); ++ increment_counter(addr, step); ++} ++ ++void LIRGenerator::increment_counter(LIR_Address* addr, int step) { ++ LIR_Opr reg = new_register(addr->type()); ++ __ load(addr, reg); ++ __ add(reg, load_immediate(step, addr->type()), reg); ++ __ store(reg, addr); ++} ++ ++void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++ LIR_Opr reg = new_register(T_INT); ++ __ load(generate_address(base, disp, T_INT), reg, info); ++ __ cmp(condition, reg, LIR_OprFact::intConst(c)); ++} ++ ++void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++ LIR_Opr reg1 = new_register(T_INT); ++ __ load(generate_address(base, disp, type), reg1, info); ++ __ cmp(condition, reg, reg1); ++} ++ ++bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { ++ if (tmp->is_valid() && c > 0 && c < max_jint) { ++ if (is_power_of_2(c - 1)) { ++ __ shift_left(left, exact_log2(c - 1), tmp); ++ __ add(tmp, left, result); ++ return true; ++ } else if (is_power_of_2(c + 1)) { ++ __ shift_left(left, exact_log2(c + 1), tmp); ++ __ sub(tmp, left, result); ++ return true; ++ } ++ } ++ return false; ++} ++ ++void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { ++ BasicType type = item->type(); ++ __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type)); ++} ++ ++void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info, ++ ciMethod* profiled_method, int profiled_bci) { ++ LIR_Opr tmp1 = new_register(objectType); ++ LIR_Opr tmp2 = new_register(objectType); ++ LIR_Opr tmp3 = new_register(objectType); ++ __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci); ++} ++ ++//---------------------------------------------------------------------- ++// visitor functions ++//---------------------------------------------------------------------- ++ ++void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { ++ assert(x->is_pinned(), ""); ++ LIRItem obj(x->obj(), this); ++ obj.load_item(); ++ ++ set_no_result(x); ++ ++ // "lock" stores the address of the monitor stack slot, so this is not an oop ++ LIR_Opr lock = new_register(T_INT); ++ // Need a tmp register for biased locking ++ LIR_Opr tmp = LIR_OprFact::illegalOpr; ++ if (UseBiasedLocking) { ++ tmp = new_register(T_INT); ++ } ++ ++ CodeEmitInfo* info_for_exception = NULL; ++ if (x->needs_null_check()) { ++ info_for_exception = state_for(x); ++ } ++ // this CodeEmitInfo must not have the xhandlers because here the ++ // object is already locked (xhandlers expect object to be unlocked) ++ CodeEmitInfo* info = state_for(x, x->state(), true); ++ monitor_enter(obj.result(), lock, syncTempOpr(), tmp, ++ x->monitor_no(), info_for_exception, info); ++} ++ ++void LIRGenerator::do_MonitorExit(MonitorExit* x) { ++ assert(x->is_pinned(), ""); ++ ++ LIRItem obj(x->obj(), this); ++ obj.dont_load_item(); ++ ++ LIR_Opr lock = new_register(T_INT); ++ LIR_Opr obj_temp = new_register(T_INT); ++ set_no_result(x); ++ monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); ++} ++ ++// neg ++void LIRGenerator::do_NegateOp(NegateOp* x) { ++ LIRItem from(x->x(), this); ++ from.load_item(); ++ LIR_Opr result = rlock_result(x); ++ __ negate(from.result(), result); ++} ++ ++// for _fadd, _fmul, _fsub, _fdiv, _frem ++// _dadd, _dmul, _dsub, _ddiv, _drem ++void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { ++ ++ // float remainder is implemented as a direct call into the runtime ++ BasicTypeList signature(2); ++ if (x->op() == Bytecodes::_frem) { ++ signature.append(T_FLOAT); ++ signature.append(T_FLOAT); ++ } else { ++ signature.append(T_DOUBLE); ++ signature.append(T_DOUBLE); ++ } ++ CallingConvention* cc = frame_map()->c_calling_convention(&signature); ++ ++ const LIR_Opr result_reg = result_register_for(x->type()); ++ ++ left.load_item(); ++ __ move(left.result(), cc->at(0)); ++ right.load_item_force(cc->at(1)); ++ ++ address entry; ++ if (x->op() == Bytecodes::_frem) { ++ entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); ++ } else { ++ entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); ++ } ++ ++ LIR_Opr result = rlock_result(x); ++ __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); ++ __ move(result_reg, result); ++ ++ return; ++ } ++ ++ if (!left.is_register()) { ++ left.load_item(); ++ } ++ // Always load right hand side. ++ right.load_item(); ++ ++ LIR_Opr reg = rlock(x); ++ LIR_Opr tmp = LIR_OprFact::illegalOpr; ++ if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) { ++ tmp = new_register(T_DOUBLE); ++ } ++ ++ arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); ++ ++ set_result(x, round_item(reg)); ++} ++ ++// for _ladd, _lmul, _lsub, _ldiv, _lrem ++void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { ++ ++ // missing test if instr is commutative and if we should swap ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { ++ ++ left.load_item(); ++ ++ bool need_zero_check = true; ++ if (right.is_constant()) { ++ jlong c = right.get_jlong_constant(); ++ // no need to do div-by-zero check if the divisor is a non-zero constant ++ if (c != 0) { need_zero_check = false; } ++ // do not load right if the divisor is a power-of-2 constant ++ if (c > 0 && is_power_of_2(c)) { ++ right.dont_load_item(); ++ } else { ++ right.load_item(); ++ } ++ } else { ++ right.load_item(); ++ } ++ if (need_zero_check) { ++ CodeEmitInfo* info = state_for(x); ++ __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); ++ __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info)); ++ } ++ ++ rlock_result(x); ++ switch (x->op()) { ++ case Bytecodes::_lrem: ++ __ rem(left.result(), right.result(), x->operand()); ++ break; ++ case Bytecodes::_ldiv: ++ __ div(left.result(), right.result(), x->operand()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } else { ++ assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, ++ "expect lmul, ladd or lsub"); ++ // add, sub, mul ++ left.load_item(); ++ if (!right.is_register()) { ++ if (x->op() == Bytecodes::_lmul || ++ !right.is_constant() || ++ (x->op() == Bytecodes::_ladd && ++ !Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) || ++ (x->op() == Bytecodes::_lsub && ++ !Assembler::operand_valid_for_add_immediate(-right.get_jlong_constant()))) { ++ right.load_item(); ++ } else { // add, sub ++ assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expected ladd or lsub"); ++ // don't load constants to save register ++ right.load_nonconstant(); ++ } ++ } ++ rlock_result(x); ++ arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); ++ } ++} ++ ++// for: _iadd, _imul, _isub, _idiv, _irem ++void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { ++ ++ // Test if instr is commutative and if we should swap ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ LIRItem* left_arg = &left; ++ LIRItem* right_arg = &right; ++ if (x->is_commutative() && left.is_stack() && right.is_register()) { ++ // swap them if left is real stack (or cached) and right is real register(not cached) ++ left_arg = &right; ++ right_arg = &left; ++ } ++ left_arg->load_item(); ++ // do not need to load right, as we can handle stack and constants ++ if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { ++ ++ rlock_result(x); ++ ++ bool need_zero_check = true; ++ if (right.is_constant()) { ++ jint c = right.get_jint_constant(); ++ // no need to do div-by-zero check if the divisor is a non-zero constant ++ if (c != 0) { need_zero_check = false; } ++ // do not load right if the divisor is a power-of-2 constant ++ if (c > 0 && is_power_of_2(c)) { ++ right_arg->dont_load_item(); ++ } else { ++ right_arg->load_item(); ++ } ++ } else { ++ right_arg->load_item(); ++ } ++ if (need_zero_check) { ++ CodeEmitInfo* info = state_for(x); ++ __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0)); ++ __ branch(lir_cond_equal, right.result()->type(), new DivByZeroStub(info)); ++ } ++ ++ LIR_Opr ill = LIR_OprFact::illegalOpr; ++ ++ if (x->op() == Bytecodes::_irem) { ++ __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); ++ } else if (x->op() == Bytecodes::_idiv) { ++ __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); ++ } ++ } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { ++ if (right.is_constant() && ++ ((x->op() == Bytecodes::_iadd && !Assembler::operand_valid_for_add_immediate(right.get_jint_constant())) || ++ (x->op() == Bytecodes::_isub && !Assembler::operand_valid_for_add_immediate(-right.get_jint_constant())))) { ++ right.load_nonconstant(); ++ } else { ++ right.load_item(); ++ } ++ rlock_result(x); ++ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr); ++ } else { ++ assert (x->op() == Bytecodes::_imul, "expect imul"); ++ if (right.is_constant()) { ++ jint c = right.get_jint_constant(); ++ if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) { ++ right_arg->dont_load_item(); ++ } else { ++ // Cannot use constant op. ++ right_arg->load_item(); ++ } ++ } else { ++ right.load_item(); ++ } ++ rlock_result(x); ++ arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT)); ++ } ++} ++ ++void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { ++ // when an operand with use count 1 is the left operand, then it is ++ // likely that no move for 2-operand-LIR-form is necessary ++ if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { ++ x->swap_operands(); ++ } ++ ++ ValueTag tag = x->type()->tag(); ++ assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); ++ switch (tag) { ++ case floatTag: ++ case doubleTag: do_ArithmeticOp_FPU(x); return; ++ case longTag: do_ArithmeticOp_Long(x); return; ++ case intTag: do_ArithmeticOp_Int(x); return; ++ default: ShouldNotReachHere(); return; ++ } ++} ++ ++// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr ++void LIRGenerator::do_ShiftOp(ShiftOp* x) { ++ LIRItem value(x->x(), this); ++ LIRItem count(x->y(), this); ++ ++ value.load_item(); ++ if (count.is_constant()) { ++ assert(count.type()->as_IntConstant() != NULL || count.type()->as_LongConstant() != NULL , "should be"); ++ count.dont_load_item(); ++ } else { ++ count.load_item(); ++ } ++ ++ LIR_Opr res = rlock_result(x); ++ shift_op(x->op(), res, value.result(), count.result(), LIR_OprFact::illegalOpr); ++} ++ ++ ++// _iand, _land, _ior, _lor, _ixor, _lxor ++void LIRGenerator::do_LogicOp(LogicOp* x) { ++ ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ++ left.load_item(); ++ rlock_result(x); ++ ValueTag tag = right.type()->tag(); ++ if(right.is_constant() && ++ ((tag == longTag && Assembler::operand_valid_for_add_immediate(right.get_jlong_constant())) || ++ (tag == intTag && Assembler::operand_valid_for_add_immediate(right.get_jint_constant())))) { ++ right.dont_load_item(); ++ } else { ++ right.load_item(); ++ } ++ ++ switch (x->op()) { ++ case Bytecodes::_iand: // fall through ++ case Bytecodes::_land: ++ __ logical_and(left.result(), right.result(), x->operand()); break; ++ case Bytecodes::_ior: // fall through ++ case Bytecodes::_lor: ++ __ logical_or(left.result(), right.result(), x->operand()); break; ++ case Bytecodes::_ixor: // fall through ++ case Bytecodes::_lxor: ++ __ logical_xor(left.result(), right.result(), x->operand()); break; ++ default: Unimplemented(); ++ } ++} ++ ++// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg ++void LIRGenerator::do_CompareOp(CompareOp* x) { ++ LIRItem left(x->x(), this); ++ LIRItem right(x->y(), this); ++ ValueTag tag = x->x()->type()->tag(); ++ if (tag == longTag) { ++ left.set_destroys_register(); ++ } ++ left.load_item(); ++ right.load_item(); ++ LIR_Opr reg = rlock_result(x); ++ ++ if (x->x()->type()->is_float_kind()) { ++ Bytecodes::Code code = x->op(); ++ __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); ++ } else if (x->x()->type()->tag() == longTag) { ++ __ lcmp2int(left.result(), right.result(), reg); ++ } else { ++ Unimplemented(); ++ } ++} ++ ++LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, LIRItem& cmp_value, LIRItem& new_value) { ++ LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience ++ new_value.load_item(); ++ cmp_value.load_item(); ++ LIR_Opr result = new_register(T_INT); ++ if (type == T_OBJECT || type == T_ARRAY) { ++ __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result); ++ } else if (type == T_INT) { ++ __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill); ++ } else if (type == T_LONG) { ++ __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill); ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ logical_xor(FrameMap::r5_opr, LIR_OprFact::intConst(1), result); ++ return result; ++} ++ ++LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) { ++ bool is_oop = type == T_OBJECT || type == T_ARRAY; ++ LIR_Opr result = new_register(type); ++ value.load_item(); ++ assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type"); ++ LIR_Opr tmp = new_register(T_INT); ++ __ xchg(addr, value.result(), result, tmp); ++ return result; ++} ++ ++LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) { ++ LIR_Opr result = new_register(type); ++ value.load_item(); ++ assert(type == T_INT LP64_ONLY( || type == T_LONG ), "unexpected type"); ++ LIR_Opr tmp = new_register(T_INT); ++ __ xadd(addr, value.result(), result, tmp); ++ return result; ++} ++ ++void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { ++ assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), ++ "wrong type"); ++ ++ switch (x->id()) { ++ case vmIntrinsics::_dexp: // fall through ++ case vmIntrinsics::_dlog: // fall through ++ case vmIntrinsics::_dpow: // fall through ++ case vmIntrinsics::_dcos: // fall through ++ case vmIntrinsics::_dsin: // fall through ++ case vmIntrinsics::_dtan: // fall through ++ case vmIntrinsics::_dlog10: ++ do_LibmIntrinsic(x); ++ break; ++ case vmIntrinsics::_dabs: // fall through ++ case vmIntrinsics::_dsqrt: { ++ assert(x->number_of_arguments() == 1, "wrong type"); ++ LIRItem value(x->argument_at(0), this); ++ value.load_item(); ++ LIR_Opr dst = rlock_result(x); ++ ++ switch (x->id()) { ++ case vmIntrinsics::_dsqrt: { ++ __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); ++ break; ++ } ++ case vmIntrinsics::_dabs: { ++ __ abs(value.result(), dst, LIR_OprFact::illegalOpr); ++ break; ++ } ++ default: ++ ShouldNotReachHere(); ++ } ++ break; ++ } ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { ++ LIRItem value(x->argument_at(0), this); ++ value.set_destroys_register(); ++ LIR_Opr calc_result = rlock_result(x); ++ LIR_Opr result_reg = result_register_for(x->type()); ++ CallingConvention* cc = NULL; ++ BasicTypeList signature(1); ++ signature.append(T_DOUBLE); ++ if (x->id() == vmIntrinsics::_dpow) { signature.append(T_DOUBLE); } ++ cc = frame_map()->c_calling_convention(&signature); ++ value.load_item_force(cc->at(0)); ++ if (x->id() == vmIntrinsics::_dpow) { ++ LIRItem value1(x->argument_at(1), this); ++ value1.set_destroys_register(); ++ value1.load_item_force(cc->at(1)); ++ } ++ switch (x->id()) { ++ case vmIntrinsics::_dexp: ++ if (StubRoutines::dexp() != NULL) { __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); } ++ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); } ++ break; ++ case vmIntrinsics::_dlog: ++ if (StubRoutines::dlog() != NULL) { __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); } ++ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); } ++ break; ++ case vmIntrinsics::_dlog10: ++ if (StubRoutines::dlog10() != NULL) { __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args()); } ++ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); } ++ break; ++ case vmIntrinsics::_dsin: ++ if (StubRoutines::dsin() != NULL) { __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args()); } ++ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); } ++ break; ++ case vmIntrinsics::_dcos: ++ if (StubRoutines::dcos() != NULL) { __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args()); } ++ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); } ++ break; ++ case vmIntrinsics::_dtan: ++ if (StubRoutines::dtan() != NULL) { __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); } ++ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); } ++ break; ++ case vmIntrinsics::_dpow: ++ if (StubRoutines::dpow() != NULL) { __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); } ++ else { __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); } ++ break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(result_reg, calc_result); ++} ++ ++ ++void LIRGenerator::do_ArrayCopy(Intrinsic* x) { ++ assert(x->number_of_arguments() == 5, "wrong type"); ++ ++ // Make all state_for calls early since they can emit code ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIRItem src(x->argument_at(0), this); ++ LIRItem src_pos(x->argument_at(1), this); ++ LIRItem dst(x->argument_at(2), this); ++ LIRItem dst_pos(x->argument_at(3), this); ++ LIRItem length(x->argument_at(4), this); ++ ++ // operands for arraycopy must use fixed registers, otherwise ++ // LinearScan will fail allocation (because arraycopy always needs a ++ // call) ++ ++ // The java calling convention will give us enough registers ++ // so that on the stub side the args will be perfect already. ++ // On the other slow/special case side we call C and the arg ++ // positions are not similar enough to pick one as the best. ++ // Also because the java calling convention is a "shifted" version ++ // of the C convention we can process the java args trivially into C ++ // args without worry of overwriting during the xfer ++ ++ src.load_item_force (FrameMap::as_oop_opr(j_rarg0)); ++ src_pos.load_item_force (FrameMap::as_opr(j_rarg1)); ++ dst.load_item_force (FrameMap::as_oop_opr(j_rarg2)); ++ dst_pos.load_item_force (FrameMap::as_opr(j_rarg3)); ++ length.load_item_force (FrameMap::as_opr(j_rarg4)); ++ ++ LIR_Opr tmp = FrameMap::as_opr(j_rarg5); ++ ++ set_no_result(x); ++ ++ int flags; ++ ciArrayKlass* expected_type = NULL; ++ arraycopy_helper(x, &flags, &expected_type); ++ ++ __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, ++ expected_type, flags, info); // does add_safepoint ++} ++ ++void LIRGenerator::do_update_CRC32(Intrinsic* x) { ++ ShouldNotReachHere(); ++} ++ ++void LIRGenerator::do_update_CRC32C(Intrinsic* x) { ++ ShouldNotReachHere(); ++} ++ ++void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { ++ assert(x->number_of_arguments() == 3, "wrong type"); ++ assert(UseFMA, "Needs FMA instructions support."); ++ LIRItem value(x->argument_at(0), this); ++ LIRItem value1(x->argument_at(1), this); ++ LIRItem value2(x->argument_at(2), this); ++ ++ value.load_item(); ++ value1.load_item(); ++ value2.load_item(); ++ ++ LIR_Opr calc_input = value.result(); ++ LIR_Opr calc_input1 = value1.result(); ++ LIR_Opr calc_input2 = value2.result(); ++ LIR_Opr calc_result = rlock_result(x); ++ ++ switch (x->id()) { ++ case vmIntrinsics::_fmaD: __ fmad(calc_input, calc_input1, calc_input2, calc_result); break; ++ case vmIntrinsics::_fmaF: __ fmaf(calc_input, calc_input1, calc_input2, calc_result); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { ++ fatal("vectorizedMismatch intrinsic is not implemented on this platform"); ++} ++ ++// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f ++// _i2b, _i2c, _i2s ++void LIRGenerator::do_Convert(Convert* x) { ++ LIRItem value(x->value(), this); ++ value.load_item(); ++ LIR_Opr input = value.result(); ++ LIR_Opr result = rlock(x); ++ ++ // arguments of lir_convert ++ LIR_Opr conv_input = input; ++ LIR_Opr conv_result = result; ++ ++ __ convert(x->op(), conv_input, conv_result); ++ ++ assert(result->is_virtual(), "result must be virtual register"); ++ set_result(x, result); ++} ++ ++void LIRGenerator::do_NewInstance(NewInstance* x) { ++#ifndef PRODUCT ++ if (PrintNotLoaded && !x->klass()->is_loaded()) { ++ tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); ++ } ++#endif ++ CodeEmitInfo* info = state_for(x, x->state()); ++ LIR_Opr reg = result_register_for(x->type()); ++ new_instance(reg, x->klass(), x->is_unresolved(), ++ FrameMap::r12_oop_opr, ++ FrameMap::r15_oop_opr, ++ FrameMap::r14_oop_opr, ++ LIR_OprFact::illegalOpr, ++ FrameMap::r13_metadata_opr, ++ info); ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIRItem length(x->length(), this); ++ length.load_item_force(FrameMap::r9_opr); ++ ++ LIR_Opr reg = result_register_for(x->type()); ++ LIR_Opr tmp1 = FrameMap::r12_oop_opr; ++ LIR_Opr tmp2 = FrameMap::r14_oop_opr; ++ LIR_Opr tmp3 = FrameMap::r15_oop_opr; ++ LIR_Opr tmp4 = reg; ++ LIR_Opr klass_reg = FrameMap::r13_metadata_opr; ++ LIR_Opr len = length.result(); ++ BasicType elem_type = x->elt_type(); ++ ++ __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); ++ ++ CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); ++ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { ++ LIRItem length(x->length(), this); ++ // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction ++ // and therefore provide the state before the parameters have been consumed ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || PatchALot) { ++ patching_info = state_for(x, x->state_before()); ++ } ++ ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ LIR_Opr reg = result_register_for(x->type()); ++ LIR_Opr tmp1 = FrameMap::r12_oop_opr; ++ LIR_Opr tmp2 = FrameMap::r14_oop_opr; ++ LIR_Opr tmp3 = FrameMap::r15_oop_opr; ++ LIR_Opr tmp4 = reg; ++ LIR_Opr klass_reg = FrameMap::r13_metadata_opr; ++ ++ length.load_item_force(FrameMap::r9_opr); ++ LIR_Opr len = length.result(); ++ ++ CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); ++ ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); ++ if (obj == ciEnv::unloaded_ciobjarrayklass()) { ++ BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); ++ } ++ klass2reg_with_patching(klass_reg, obj, patching_info); ++ __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++ ++void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { ++ Values* dims = x->dims(); ++ int i = dims->length(); ++ LIRItemList* items = new LIRItemList(i, i, NULL); ++ while (i-- > 0) { ++ LIRItem* size = new LIRItem(dims->at(i), this); ++ items->at_put(i, size); ++ } ++ ++ // Evaluate state_for early since it may emit code. ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || PatchALot) { ++ patching_info = state_for(x, x->state_before()); ++ ++ // Cannot re-use same xhandlers for multiple CodeEmitInfos, so ++ // clone all handlers (NOTE: Usually this is handled transparently ++ // by the CodeEmitInfo cloning logic in CodeStub constructors but ++ // is done explicitly here because a stub isn't being used). ++ x->set_exception_handlers(new XHandlers(x->exception_handlers())); ++ } ++ CodeEmitInfo* info = state_for(x, x->state()); ++ ++ i = dims->length(); ++ while (i-- > 0) { ++ LIRItem* size = items->at(i); ++ size->load_item(); ++ ++ store_stack_parameter(size->result(), in_ByteSize(i * BytesPerInt)); ++ } ++ ++ LIR_Opr klass_reg = FrameMap::r10_metadata_opr; ++ klass2reg_with_patching(klass_reg, x->klass(), patching_info); ++ ++ LIR_Opr rank = FrameMap::r9_opr; ++ __ move(LIR_OprFact::intConst(x->rank()), rank); ++ LIR_Opr varargs = FrameMap::r12_opr; ++ __ move(FrameMap::sp_opr, varargs); ++ LIR_OprList* args = new LIR_OprList(3); ++ args->append(klass_reg); ++ args->append(rank); ++ args->append(varargs); ++ LIR_Opr reg = result_register_for(x->type()); ++ __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), ++ LIR_OprFact::illegalOpr, ++ reg, args, info); ++ ++ LIR_Opr result = rlock_result(x); ++ __ move(reg, result); ++} ++ ++void LIRGenerator::do_BlockBegin(BlockBegin* x) { ++ // nothing to do for now ++} ++ ++void LIRGenerator::do_CheckCast(CheckCast* x) { ++ LIRItem obj(x->obj(), this); ++ ++ CodeEmitInfo* patching_info = NULL; ++ if (!x->klass()->is_loaded() || ++ (PatchALot && !x->is_incompatible_class_change_check() && !x->is_invokespecial_receiver_check())) { ++ // must do this before locking the destination register as an oop register, ++ // and before the obj is loaded (the latter is for deoptimization) ++ patching_info = state_for(x, x->state_before()); ++ } ++ obj.load_item(); ++ ++ // info for exceptions ++ CodeEmitInfo* info_for_exception = ++ (x->needs_exception_state() ? state_for(x) : ++ state_for(x, x->state_before(), true /*ignore_xhandler*/ )); ++ ++ CodeStub* stub = NULL; ++ if (x->is_incompatible_class_change_check()) { ++ assert(patching_info == NULL, "can't patch this"); ++ stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, ++ info_for_exception); ++ } else if (x->is_invokespecial_receiver_check()) { ++ assert(patching_info == NULL, "can't patch this"); ++ stub = new DeoptimizeStub(info_for_exception, ++ Deoptimization::Reason_class_check, ++ Deoptimization::Action_none); ++ } else { ++ stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception); ++ } ++ LIR_Opr reg = rlock_result(x); ++ LIR_Opr tmp3 = LIR_OprFact::illegalOpr; ++ if (!x->klass()->is_loaded() || UseCompressedClassPointers) { ++ tmp3 = new_register(objectType); ++ } ++ __ checkcast(reg, obj.result(), x->klass(), ++ new_register(objectType), new_register(objectType), tmp3, ++ x->direct_compare(), info_for_exception, patching_info, stub, ++ x->profiled_method(), x->profiled_bci()); ++} ++ ++void LIRGenerator::do_InstanceOf(InstanceOf* x) { ++ LIRItem obj(x->obj(), this); ++ ++ // result and test object may not be in same register ++ LIR_Opr reg = rlock_result(x); ++ CodeEmitInfo* patching_info = NULL; ++ if ((!x->klass()->is_loaded() || PatchALot)) { ++ // must do this before locking the destination register as an oop register ++ patching_info = state_for(x, x->state_before()); ++ } ++ obj.load_item(); ++ LIR_Opr tmp3 = LIR_OprFact::illegalOpr; ++ if (!x->klass()->is_loaded() || UseCompressedClassPointers) { ++ tmp3 = new_register(objectType); ++ } ++ __ instanceof(reg, obj.result(), x->klass(), ++ new_register(objectType), new_register(objectType), tmp3, ++ x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); ++} ++ ++void LIRGenerator::do_If(If* x) { ++ // If should have two successors ++ assert(x->number_of_sux() == 2, "inconsistency"); ++ ValueTag tag = x->x()->type()->tag(); ++ bool is_safepoint = x->is_safepoint(); ++ ++ If::Condition cond = x->cond(); ++ ++ LIRItem xitem(x->x(), this); ++ LIRItem yitem(x->y(), this); ++ LIRItem* xin = &xitem; ++ LIRItem* yin = &yitem; ++ ++ if (tag == longTag) { ++ // for longs, only conditions "eql", "neq", "lss", "geq" are valid; ++ // mirror for other conditions ++ if (cond == If::gtr || cond == If::leq) { ++ cond = Instruction::mirror(cond); ++ xin = &yitem; ++ yin = &xitem; ++ } ++ xin->set_destroys_register(); ++ } ++ xin->load_item(); ++ yin->load_item(); ++ ++ set_no_result(x); ++ ++ LIR_Opr left = xin->result(); ++ LIR_Opr right = yin->result(); ++ ++ // add safepoint before generating condition code so it can be recomputed ++ if (x->is_safepoint()) { ++ // increment backedge counter if needed ++ increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()), ++ x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci()); ++ __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); ++ } ++ ++ // Generate branch profiling. Profiling code doesn't kill flags. ++ __ cmp(lir_cond(cond), left, right); ++ profile_branch(x, cond); ++ move_to_phi(x->state()); ++ if (x->x()->type()->is_float_kind()) { ++ __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux()); ++ } else { ++ __ branch(lir_cond(cond), right->type(), x->tsux()); ++ } ++ assert(x->default_sux() == x->fsux(), "wrong destination above"); ++ __ jump(x->default_sux()); ++} ++ ++LIR_Opr LIRGenerator::getThreadPointer() { ++ return FrameMap::as_pointer_opr(xthread); ++} ++ ++void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); } ++ ++void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, ++ CodeEmitInfo* info) { ++ __ volatile_store_mem_reg(value, address, info); ++} ++ ++void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, ++ CodeEmitInfo* info) { ++ if (!UseBarriersForVolatile) { ++ __ membar(); ++ } ++ ++ __ volatile_load_mem_reg(address, result, info); ++} +diff --git a/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp +new file mode 100644 +index 000000000..00e33e882 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_LIR_riscv.cpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/register.hpp" ++#include "c1/c1_LIR.hpp" ++ ++FloatRegister LIR_OprDesc::as_float_reg() const { ++ return as_FloatRegister(fpu_regnr()); ++} ++ ++FloatRegister LIR_OprDesc::as_double_reg() const { ++ return as_FloatRegister(fpu_regnrLo()); ++} ++ ++// Reg2 unused. ++LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { ++ assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform"); ++ return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | ++ (reg1 << LIR_OprDesc::reg2_shift) | ++ LIR_OprDesc::double_type | ++ LIR_OprDesc::fpu_register | ++ LIR_OprDesc::double_size); ++} ++ ++#ifndef PRODUCT ++void LIR_Address::verify() const { ++ assert(base()->is_cpu_register(), "wrong base operand"); ++ assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand"); ++ assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, ++ "wrong type for addresses"); ++} ++#endif // PRODUCT +diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp +new file mode 100644 +index 000000000..60dcdc0e1 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp +@@ -0,0 +1,33 @@ ++/* ++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_Instruction.hpp" ++#include "c1/c1_LinearScan.hpp" ++#include "utilities/bitMap.inline.hpp" ++ ++void LinearScan::allocate_fpu_stack() { ++ // No FPU stack on RISCV ++} +diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp +new file mode 100644 +index 000000000..f0aa08a39 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.hpp +@@ -0,0 +1,85 @@ ++/* ++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_C1_LINEARSCAN_RISCV_HPP ++#define CPU_RISCV_C1_LINEARSCAN_RISCV_HPP ++ ++inline bool LinearScan::is_processed_reg_num(int reg_num) ++{ ++ return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; ++} ++ ++inline int LinearScan::num_physical_regs(BasicType type) { ++ return 1; ++} ++ ++ ++inline bool LinearScan::requires_adjacent_regs(BasicType type) { ++ return false; ++} ++ ++inline bool LinearScan::is_caller_save(int assigned_reg) { ++ assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); ++ if (assigned_reg < pd_first_callee_saved_reg) { ++ return true; ++ } ++ if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg_1) { ++ return true; ++ } ++ if (assigned_reg > pd_last_callee_saved_fpu_reg_1 && assigned_reg < pd_first_callee_saved_fpu_reg_2) { ++ return true; ++ } ++ if (assigned_reg > pd_last_callee_saved_fpu_reg_2 && assigned_reg < pd_last_fpu_reg) { ++ return true; ++ } ++ return false; ++} ++ ++ ++inline void LinearScan::pd_add_temps(LIR_Op* op) { ++} ++ ++ ++// Implementation of LinearScanWalker ++ ++inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) ++{ ++ if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) { ++ assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only"); ++ _first_reg = pd_first_callee_saved_reg; ++ _last_reg = pd_last_callee_saved_reg; ++ return true; ++ } else if (cur->type() == T_INT || cur->type() == T_LONG || ++ cur->type() == T_OBJECT || cur->type() == T_ADDRESS || cur->type() == T_METADATA) { ++ _first_reg = pd_first_cpu_reg; ++ _last_reg = pd_last_allocatable_cpu_reg; ++ return true; ++ } ++ return false; ++} ++ ++ ++#endif // CPU_RISCV_C1_LINEARSCAN_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +new file mode 100644 +index 000000000..370ec45c6 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +@@ -0,0 +1,441 @@ ++/* ++ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markOop.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++ ++void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, ++ FloatRegister freg0, FloatRegister freg1, ++ Register result) ++{ ++ if (is_float) { ++ float_compare(result, freg0, freg1, unordered_result); ++ } else { ++ double_compare(result, freg0, freg1, unordered_result); ++ } ++} ++ ++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case) { ++ const int aligned_mask = BytesPerWord - 1; ++ const int hdr_offset = oopDesc::mark_offset_in_bytes(); ++ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); ++ Label done; ++ int null_check_offset = -1; ++ ++ verify_oop(obj); ++ ++ // save object being locked into the BasicObjectLock ++ sd(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ ++ if (UseBiasedLocking) { ++ assert(tmp != noreg, "should have tmp register at this point"); ++ null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, tmp, false, done, &slow_case); ++ } else { ++ null_check_offset = offset(); ++ } ++ ++ // Load object header ++ ld(hdr, Address(obj, hdr_offset)); ++ // and mark it as unlocked ++ ori(hdr, hdr, markOopDesc::unlocked_value); ++ // save unlocked object header into the displaced header location on the stack ++ sd(hdr, Address(disp_hdr, 0)); ++ // test if object header is still the same (i.e. unlocked), and if so, store the ++ // displaced header address in the object header - if it is not the same, get the ++ // object header instead ++ la(t1, Address(obj, hdr_offset)); ++ cmpxchgptr(hdr, disp_hdr, t1, t0, done, /*fallthough*/NULL); ++ // if the object header was the same, we're done ++ // if the object header was not the same, it is now in the hdr register ++ // => test if it is a stack pointer into the same stack (recursive locking), i.e.: ++ // ++ // 1) (hdr & aligned_mask) == 0 ++ // 2) sp <= hdr ++ // 3) hdr <= sp + page_size ++ // ++ // these 3 tests can be done by evaluating the following expression: ++ // ++ // (hdr -sp) & (aligned_mask - page_size) ++ // ++ // assuming both the stack pointer and page_size have their least ++ // significant 2 bits cleared and page_size is a power of 2 ++ sub(hdr, hdr, sp); ++ mv(t0, aligned_mask - os::vm_page_size()); ++ andr(hdr, hdr, t0); ++ // for recursive locking, the result is zero => save it in the displaced header ++ // location (NULL in the displaced hdr location indicates recursive locking) ++ sd(hdr, Address(disp_hdr, 0)); ++ // otherwise we don't care about the result and handle locking via runtime call ++ bnez(hdr, slow_case, /* is_far */ true); ++ bind(done); ++ if (PrintBiasedLockingStatistics) { ++ la(t1, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); ++ incrementw(Address(t1, 0)); ++ } ++ return null_check_offset; ++} ++ ++void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { ++ const int aligned_mask = BytesPerWord - 1; ++ const int hdr_offset = oopDesc::mark_offset_in_bytes(); ++ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); ++ Label done; ++ ++ if (UseBiasedLocking) { ++ // load object ++ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ biased_locking_exit(obj, hdr, done); ++ } ++ ++ // load displaced header ++ ld(hdr, Address(disp_hdr, 0)); ++ // if the loaded hdr is NULL we had recursive locking ++ // if we had recursive locking, we are done ++ beqz(hdr, done); ++ if (!UseBiasedLocking) { ++ // load object ++ ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); ++ } ++ verify_oop(obj); ++ // test if object header is pointing to the displaced header, and if so, restore ++ // the displaced header in the object - if the object header is not pointing to ++ // the displaced header, get the object header instead ++ // if the object header was not pointing to the displaced header, ++ // we do unlocking via runtime call ++ if (hdr_offset) { ++ la(t0, Address(obj, hdr_offset)); ++ cmpxchgptr(disp_hdr, hdr, t0, t1, done, &slow_case); ++ } else { ++ cmpxchgptr(disp_hdr, hdr, obj, t1, done, &slow_case); ++ } ++ bind(done); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register tmp1, Register tmp2, Label& slow_case) { ++ if (UseTLAB) { ++ tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, /* is_far */ true); ++ } else { ++ eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, /* is_far */ true); ++ } ++} ++ ++void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2) { ++ assert_different_registers(obj, klass, len); ++ if (UseBiasedLocking && !len->is_valid()) { ++ assert_different_registers(obj, klass, len, tmp1, tmp2); ++ ld(tmp1, Address(klass, Klass::prototype_header_offset())); ++ } else { ++ // This assumes that all prototype bits fitr in an int32_t ++ mv(tmp1, (int32_t)(intptr_t)markOopDesc::prototype()); ++ } ++ sd(tmp1, Address(obj, oopDesc::mark_offset_in_bytes())); ++ ++ if (UseCompressedClassPointers) { // Take care not to kill klass ++ encode_klass_not_null(tmp1, klass); ++ sw(tmp1, Address(obj, oopDesc::klass_offset_in_bytes())); ++ } else { ++ sd(klass, Address(obj, oopDesc::klass_offset_in_bytes())); ++ } ++ ++ if (len->is_valid()) { ++ sw(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); ++ } else if (UseCompressedClassPointers) { ++ store_klass_gap(obj, zr); ++ } ++} ++ ++// preserves obj, destroys len_in_bytes ++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1) { ++ assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); ++ Label done; ++ ++ // len_in_bytes is positive and ptr sized ++ sub(len_in_bytes, len_in_bytes, hdr_size_in_bytes); ++ beqz(len_in_bytes, done); ++ ++ // Preserve obj ++ if (hdr_size_in_bytes) { ++ add(obj, obj, hdr_size_in_bytes); ++ } ++ zero_memory(obj, len_in_bytes, tmp1); ++ if (hdr_size_in_bytes) { ++ sub(obj, obj, hdr_size_in_bytes); ++ } ++ ++ bind(done); ++} ++ ++void C1_MacroAssembler::allocate_object(Register obj, Register tmp1, Register tmp2, int header_size, int object_size, Register klass, Label& slow_case) { ++ assert_different_registers(obj, tmp1, tmp2); ++ assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); ++ ++ try_allocate(obj, noreg, object_size * BytesPerWord, tmp1, tmp2, slow_case); ++ ++ initialize_object(obj, klass, noreg, object_size * HeapWordSize, tmp1, tmp2, UseTLAB); ++} ++ ++void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register tmp1, Register tmp2, bool is_tlab_allocated) { ++ assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, ++ "con_size_in_bytes is not multiple of alignment"); ++ const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; ++ ++ initialize_header(obj, klass, noreg, tmp1, tmp2); ++ ++ if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { ++ // clear rest of allocated space ++ const Register index = tmp2; ++ // 16: multipler for threshold ++ const int threshold = 16 * BytesPerWord; // approximate break even point for code size (see comments below) ++ if (var_size_in_bytes != noreg) { ++ mv(index, var_size_in_bytes); ++ initialize_body(obj, index, hdr_size_in_bytes, tmp1); ++ } else if (con_size_in_bytes <= threshold) { ++ // use explicit null stores ++ int i = hdr_size_in_bytes; ++ if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) { // 2: multipler for BytesPerWord ++ sd(zr, Address(obj, i)); ++ i += BytesPerWord; ++ } ++ for (; i < con_size_in_bytes; i += BytesPerWord) { ++ sd(zr, Address(obj, i)); ++ } ++ } else if (con_size_in_bytes > hdr_size_in_bytes) { ++ block_comment("zero memory"); ++ // use loop to null out the fields ++ int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord; ++ mv(index, words / 8); // 8: byte size ++ ++ const int unroll = 8; // Number of sd(zr) instructions we'll unroll ++ int remainder = words % unroll; ++ la(t0, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord)); ++ ++ Label entry_point, loop; ++ j(entry_point); ++ ++ bind(loop); ++ sub(index, index, 1); ++ for (int i = -unroll; i < 0; i++) { ++ if (-i == remainder) { ++ bind(entry_point); ++ } ++ sd(zr, Address(t0, i * wordSize)); ++ } ++ if (remainder == 0) { ++ bind(entry_point); ++ } ++ add(t0, t0, unroll * wordSize); ++ bnez(index, loop); ++ } ++ } ++ ++ membar(MacroAssembler::StoreStore); ++ ++ if (CURRENT_ENV->dtrace_alloc_probes()) { ++ assert(obj == x10, "must be"); ++ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); ++ } ++ ++ verify_oop(obj); ++} ++ ++void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int header_size, int f, Register klass, Label& slow_case) { ++ assert_different_registers(obj, len, tmp1, tmp2, klass); ++ ++ // determine alignment mask ++ assert(!(BytesPerWord & 1), "must be multiple of 2 for masking code to work"); ++ ++ // check for negative or excessive length ++ mv(t0, (int32_t)max_array_allocation_length); ++ bgeu(len, t0, slow_case, /* is_far */ true); ++ ++ const Register arr_size = tmp2; // okay to be the same ++ // align object end ++ mv(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); ++ shadd(arr_size, len, arr_size, t0, f); ++ andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask); ++ ++ try_allocate(obj, arr_size, 0, tmp1, tmp2, slow_case); ++ ++ initialize_header(obj, klass, len, tmp1, tmp2); ++ ++ // clear rest of allocated space ++ const Register len_zero = len; ++ initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero); ++ ++ membar(MacroAssembler::StoreStore); ++ ++ if (CURRENT_ENV->dtrace_alloc_probes()) { ++ assert(obj == x10, "must be"); ++ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); ++ } ++ ++ verify_oop(obj); ++} ++ ++void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache, Label &L) { ++ verify_oop(receiver); ++ // explicit NULL check not needed since load from [klass_offset] causes a trap ++ // check against inline cache ++ assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check"); ++ cmp_klass(receiver, iCache, t0, L); ++} ++ ++void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. For this action to be legal we ++ // must ensure that this first instruction is a J, JAL or NOP. ++ // Make it a NOP. ++ nop(); ++ assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); ++ // Make sure there is enough stack space for this method's activation. ++ // Note that we do this before doing an enter(). ++ generate_stack_overflow_check(bang_size_in_bytes); ++ MacroAssembler::build_frame(framesize + 2 * wordSize); // 2: multipler for wordSize ++} ++ ++void C1_MacroAssembler::remove_frame(int framesize) { ++ MacroAssembler::remove_frame(framesize + 2 * wordSize); // 2: multiper for wordSize ++} ++ ++ ++void C1_MacroAssembler::verified_entry() { ++} ++ ++void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { ++ // fp + -2: link ++ // + -1: return address ++ // + 0: argument with offset 0 ++ // + 1: argument with offset 1 ++ // + 2: ... ++ ld(reg, Address(fp, offset_in_words * BytesPerWord)); ++} ++ ++#ifndef PRODUCT ++ ++void C1_MacroAssembler::verify_stack_oop(int stack_offset) { ++ if (!VerifyOops) { ++ return; ++ } ++ verify_oop_addr(Address(sp, stack_offset), "oop"); ++} ++ ++void C1_MacroAssembler::verify_not_null_oop(Register r) { ++ if (!VerifyOops) return; ++ Label not_null; ++ bnez(r, not_null); ++ stop("non-null oop required"); ++ bind(not_null); ++ verify_oop(r); ++} ++ ++void C1_MacroAssembler::invalidate_registers(bool inv_x10, bool inv_x9, bool inv_x12, bool inv_x13, bool inv_x14, bool inv_x15) { ++#ifdef ASSERT ++ static int nn; ++ if (inv_x10) { mv(x10, 0xDEAD); } ++ if (inv_x9) { mv(x9, 0xDEAD); } ++ if (inv_x12) { mv(x12, nn++); } ++ if (inv_x13) { mv(x13, 0xDEAD); } ++ if (inv_x14) { mv(x14, 0xDEAD); } ++ if (inv_x15) { mv(x15, 0xDEAD); } ++#endif // ASSERT ++} ++#endif // ifndef PRODUCT ++ ++typedef void (C1_MacroAssembler::*c1_cond_branch_insn)(Register op1, Register op2, Label& label, bool is_far); ++typedef void (C1_MacroAssembler::*c1_float_cond_branch_insn)(FloatRegister op1, FloatRegister op2, ++ Label& label, bool is_far, bool is_unordered); ++ ++static c1_cond_branch_insn c1_cond_branch[] = ++{ ++ /* SHORT branches */ ++ (c1_cond_branch_insn)&Assembler::beq, ++ (c1_cond_branch_insn)&Assembler::bne, ++ (c1_cond_branch_insn)&Assembler::blt, ++ (c1_cond_branch_insn)&Assembler::ble, ++ (c1_cond_branch_insn)&Assembler::bge, ++ (c1_cond_branch_insn)&Assembler::bgt, ++ (c1_cond_branch_insn)&Assembler::bleu, // lir_cond_belowEqual ++ (c1_cond_branch_insn)&Assembler::bgeu // lir_cond_aboveEqual ++}; ++ ++static c1_float_cond_branch_insn c1_float_cond_branch[] = ++{ ++ /* FLOAT branches */ ++ (c1_float_cond_branch_insn)&MacroAssembler::float_beq, ++ (c1_float_cond_branch_insn)&MacroAssembler::float_bne, ++ (c1_float_cond_branch_insn)&MacroAssembler::float_blt, ++ (c1_float_cond_branch_insn)&MacroAssembler::float_ble, ++ (c1_float_cond_branch_insn)&MacroAssembler::float_bge, ++ (c1_float_cond_branch_insn)&MacroAssembler::float_bgt, ++ NULL, // lir_cond_belowEqual ++ NULL, // lir_cond_aboveEqual ++ ++ /* DOUBLE branches */ ++ (c1_float_cond_branch_insn)&MacroAssembler::double_beq, ++ (c1_float_cond_branch_insn)&MacroAssembler::double_bne, ++ (c1_float_cond_branch_insn)&MacroAssembler::double_blt, ++ (c1_float_cond_branch_insn)&MacroAssembler::double_ble, ++ (c1_float_cond_branch_insn)&MacroAssembler::double_bge, ++ (c1_float_cond_branch_insn)&MacroAssembler::double_bgt, ++ NULL, // lir_cond_belowEqual ++ NULL // lir_cond_aboveEqual ++}; ++ ++void C1_MacroAssembler::c1_cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, ++ BasicType type, bool is_far) { ++ if (type == T_OBJECT || type == T_ARRAY) { ++ assert(cmpFlag == lir_cond_equal || cmpFlag == lir_cond_notEqual, "Should be equal or notEqual"); ++ if (cmpFlag == lir_cond_equal) { ++ oop_equal(op1, op2, label, is_far); ++ } else { ++ oop_nequal(op1, op2, label, is_far); ++ } ++ } else { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(c1_cond_branch) / sizeof(c1_cond_branch[0])), ++ "invalid c1 conditional branch index"); ++ (this->*c1_cond_branch[cmpFlag])(op1, op2, label, is_far); ++ } ++} ++ ++void C1_MacroAssembler::c1_float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, ++ bool is_far, bool is_unordered) { ++ assert(cmpFlag >= 0 && ++ cmpFlag < (int)(sizeof(c1_float_cond_branch) / sizeof(c1_float_cond_branch[0])), ++ "invalid c1 float conditional branch index"); ++ (this->*c1_float_cond_branch[cmpFlag])(op1, op2, label, is_far, is_unordered); ++} +diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp +new file mode 100644 +index 000000000..5d0cefe89 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp +@@ -0,0 +1,121 @@ ++/* ++ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP ++#define CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP ++ ++using MacroAssembler::build_frame; ++using MacroAssembler::null_check; ++ ++// C1_MacroAssembler contains high-level macros for C1 ++ ++ private: ++ int _rsp_offset; // track rsp changes ++ // initialization ++ void pd_init() { _rsp_offset = 0; } ++ ++ ++ public: ++ void try_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp1, // temp register ++ Register tmp2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ ++ void initialize_header(Register obj, Register klass, Register len, Register tmp1, Register tmp2); ++ void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register tmp1); ++ ++ void float_cmp(bool is_float, int unordered_result, ++ FloatRegister f0, FloatRegister f1, ++ Register result); ++ ++ // locking ++ // hdr : must be x10, contents destroyed ++ // obj : must point to the object to lock, contents preserved ++ // disp_hdr: must point to the displaced header location, contents preserved ++ // tmp : temporary register, contents destroyed ++ // returns code offset at which to add null check debug information ++ int lock_object (Register swap, Register obj, Register disp_hdr, Register tmp, Label& slow_case); ++ ++ // unlocking ++ // hdr : contents destroyed ++ // obj : must point to the object to lock, contents preserved ++ // disp_hdr: must be x10 & must point to the displaced header location, contents destroyed ++ void unlock_object(Register swap, Register obj, Register lock, Label& slow_case); ++ ++ void initialize_object( ++ Register obj, // result: pointer to object after successful allocation ++ Register klass, // object klass ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp1, // temp register ++ Register tmp2, // temp register ++ bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB ++ ); ++ ++ // allocation of fixed-size objects ++ // (can also be used to allocate fixed-size arrays, by setting ++ // hdr_size correctly and storing the array length afterwards) ++ // obj : will contain pointer to allocated object ++ // t1, t2 : temp registers - contents destroyed ++ // header_size: size of object header in words ++ // object_size: total size of object in words ++ // slow_case : exit to slow case implementation if fast allocation fails ++ void allocate_object(Register obj, Register tmp1, Register tmp2, int header_size, int object_size, Register klass, Label& slow_case); ++ ++ enum { ++ max_array_allocation_length = 0x00FFFFFF ++ }; ++ ++ // allocation of arrays ++ // obj : will contain pointer to allocated object ++ // len : array length in number of elements ++ // t : temp register - contents destroyed ++ // header_size: size of object header in words ++ // f : element scale factor ++ // slow_case : exit to slow case implementation if fast allocation fails ++ void allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int header_size, int f, Register klass, Label& slow_case); ++ ++ int rsp_offset() const { return _rsp_offset; } ++ ++ void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN; ++ ++ // This platform only uses signal-based null checks. The Label is not needed. ++ void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } ++ ++ void load_parameter(int offset_in_words, Register reg); ++ ++ void inline_cache_check(Register receiver, Register iCache, Label &L); ++ ++ static const int c1_double_branch_mask = 1 << 3; // depend on c1_float_cond_branch ++ void c1_cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, BasicType type, bool is_far); ++ void c1_float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, ++ bool is_far, bool is_unordered = false); ++ ++#endif // CPU_RISCV_C1_MACROASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +new file mode 100644 +index 000000000..f06e7b51c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_Runtime1_riscv.cpp +@@ -0,0 +1,1206 @@ ++/* ++ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "c1/c1_CodeStubs.hpp" ++#include "c1/c1_Defs.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "c1/c1_Runtime1.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "register_riscv.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/vframe.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_riscv.inline.hpp" ++ ++ ++// Implementation of StubAssembler ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { ++ // setup registers ++ assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, ++ "registers must be different"); ++ assert(oop_result1 != xthread && metadata_result != xthread, "registers must be different"); ++ assert(args_size >= 0, "illegal args_size"); ++ bool align_stack = false; ++ ++ mv(c_rarg0, xthread); ++ set_num_rt_args(0); // Nothing on stack ++ ++ Label retaddr; ++ set_last_Java_frame(sp, fp, retaddr, t0); ++ ++ // do the call ++ int32_t off = 0; ++ la_patchable(t0, RuntimeAddress(entry), off); ++ jalr(x1, t0, off); ++ bind(retaddr); ++ int call_offset = offset(); ++ // verify callee-saved register ++#ifdef ASSERT ++ push_reg(x10, sp); ++ { Label L; ++ get_thread(x10); ++ beq(xthread, x10, L); ++ stop("StubAssembler::call_RT: xthread not callee saved?"); ++ bind(L); ++ } ++ pop_reg(x10, sp); ++#endif ++ reset_last_Java_frame(true); ++ ++ // check for pending exceptions ++ { Label L; ++ // check for pending exceptions (java_thread is set upon return) ++ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ beqz(t0, L); ++ // exception pending => remove activation and forward to exception handler ++ // make sure that the vm_results are cleared ++ if (oop_result1->is_valid()) { ++ sd(zr, Address(xthread, JavaThread::vm_result_offset())); ++ } ++ if (metadata_result->is_valid()) { ++ sd(zr, Address(xthread, JavaThread::vm_result_2_offset())); ++ } ++ if (frame_size() == no_frame_size) { ++ leave(); ++ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ } else if (_stub_id == Runtime1::forward_exception_id) { ++ should_not_reach_here(); ++ } else { ++ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); ++ } ++ bind(L); ++ } ++ // get oop results if there are any and reset the values in the thread ++ if (oop_result1->is_valid()) { ++ get_vm_result(oop_result1, xthread); ++ } ++ if (metadata_result->is_valid()) { ++ get_vm_result_2(metadata_result, xthread); ++ } ++ return call_offset; ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) { ++ mv(c_rarg1, arg1); ++ return call_RT(oop_result1, metadata_result, entry, 1); ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) { ++ const int arg_num = 2; ++ if (c_rarg1 == arg2) { ++ if (c_rarg2 == arg1) { ++ xorr(arg1, arg1, arg2); ++ xorr(arg2, arg1, arg2); ++ xorr(arg1, arg1, arg2); ++ } else { ++ mv(c_rarg2, arg2); ++ mv(c_rarg1, arg1); ++ } ++ } else { ++ mv(c_rarg1, arg1); ++ mv(c_rarg2, arg2); ++ } ++ return call_RT(oop_result1, metadata_result, entry, arg_num); ++} ++ ++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { ++ const int arg_num = 3; ++ // if there is any conflict use the stack ++ if (arg1 == c_rarg2 || arg1 == c_rarg3 || ++ arg2 == c_rarg1 || arg2 == c_rarg3 || ++ arg3 == c_rarg1 || arg3 == c_rarg2) { ++ const int arg1_sp_offset = 0; ++ const int arg2_sp_offset = 1; ++ const int arg3_sp_offset = 2; ++ addi(sp, sp, -(arg_num * wordSize)); ++ sd(arg3, Address(sp, arg3_sp_offset * wordSize)); ++ sd(arg2, Address(sp, arg2_sp_offset * wordSize)); ++ sd(arg1, Address(sp, arg1_sp_offset * wordSize)); ++ ++ ld(c_rarg1, Address(sp, arg1_sp_offset * wordSize)); ++ ld(c_rarg2, Address(sp, arg2_sp_offset * wordSize)); ++ ld(c_rarg3, Address(sp, arg3_sp_offset * wordSize)); ++ addi(sp, sp, arg_num * wordSize); ++ } else { ++ mv(c_rarg1, arg1); ++ mv(c_rarg2, arg2); ++ mv(c_rarg3, arg3); ++ } ++ return call_RT(oop_result1, metadata_result, entry, arg_num); ++} ++ ++// Implementation of StubFrame ++ ++class StubFrame: public StackObj { ++ private: ++ StubAssembler* _sasm; ++ ++ public: ++ StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments); ++ void load_argument(int offset_in_words, Register reg); ++ ++ ~StubFrame(); ++};; ++ ++void StubAssembler::prologue(const char* name, bool must_gc_arguments) { ++ set_info(name, must_gc_arguments); ++ enter(); ++} ++ ++void StubAssembler::epilogue() { ++ leave(); ++ ret(); ++} ++ ++#define __ _sasm-> ++ ++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) { ++ _sasm = sasm; ++ __ prologue(name, must_gc_arguments); ++} ++ ++// load parameters that were stored with LIR_Assembler::store_parameter ++// Note: offsets for store_parameter and load_argument must match ++void StubFrame::load_argument(int offset_in_words, Register reg) { ++ __ load_parameter(offset_in_words, reg); ++} ++ ++ ++StubFrame::~StubFrame() { ++ __ epilogue(); ++ _sasm = NULL; ++} ++ ++#undef __ ++ ++ ++// Implementation of Runtime1 ++ ++#define __ sasm-> ++ ++const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2; ++ ++// Stack layout for saving/restoring all the registers needed during a runtime ++// call (this includes deoptimization) ++// Note: note that users of this frame may well have arguments to some runtime ++// while these values are on the stack. These positions neglect those arguments ++// but the code in save_live_registers will take the argument count into ++// account. ++// ++ ++enum reg_save_layout { ++ reg_save_frame_size = 32 /* float */ + 30 /* integer excluding x3, x4 */ ++}; ++ ++// Save off registers which might be killed by calls into the runtime. ++// Tries to smart of about FP registers. In particular we separate ++// saving and describing the FPU registers for deoptimization since we ++// have to save the FPU registers twice if we describe them. The ++// deopt blob is the only thing which needs to describe FPU registers. ++// In all other cases it should be sufficient to simply save their ++// current value. ++ ++static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; ++static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; ++ ++static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { ++ int frame_size_in_bytes = reg_save_frame_size * BytesPerWord; ++ sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); ++ int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); ++ OopMap* oop_map = new OopMap(frame_size_in_slots, 0); ++ assert_cond(oop_map != NULL); ++ ++ // cpu_regs, caller save registers only, see FrameMap::initialize ++ // in c1_FrameMap_riscv.cpp for detail. ++ const static Register caller_save_cpu_regs[FrameMap::max_nof_caller_save_cpu_regs] = {x7, x10, x11, x12, ++ x13, x14, x15, x16, x17, ++ x28, x29, x30, x31}; ++ for (int i = 0; i < FrameMap::max_nof_caller_save_cpu_regs; i++) { ++ Register r = caller_save_cpu_regs[i]; ++ int sp_offset = cpu_reg_save_offsets[r->encoding()]; ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), ++ r->as_VMReg()); ++ } ++ ++ // fpu_regs ++ if (save_fpu_registers) { ++ for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ FloatRegister r = as_FloatRegister(i); ++ int sp_offset = fpu_reg_save_offsets[i]; ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), ++ r->as_VMReg()); ++ } ++ } ++ return oop_map; ++} ++ ++static OopMap* save_live_registers(StubAssembler* sasm, ++ bool save_fpu_registers = true) { ++ __ block_comment("save_live_registers"); ++ ++ // if the number of pushed regs is odd, one slot will be reserved for alignment ++ __ push_reg(RegSet::range(x5, x31), sp); // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) ++ ++ if (save_fpu_registers) { ++ // float registers ++ __ addi(sp, sp, -(FrameMap::nof_fpu_regs * wordSize)); ++ for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ __ fsd(as_FloatRegister(i), Address(sp, i * wordSize)); ++ } ++ } else { ++ // we define reg_save_layout = 62 as the fixed frame size, ++ // we should also sub 32 * wordSize to sp when save_fpu_registers == false ++ __ addi(sp, sp, -32 * wordSize); ++ } ++ ++ return generate_oop_map(sasm, save_fpu_registers); ++} ++ ++static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { ++ if (restore_fpu_registers) { ++ for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ __ fld(as_FloatRegister(i), Address(sp, i * wordSize)); ++ } ++ __ addi(sp, sp, FrameMap::nof_fpu_regs * wordSize); ++ } else { ++ // we define reg_save_layout = 64 as the fixed frame size, ++ // we should also add 32 * wordSize to sp when save_fpu_registers == false ++ __ addi(sp, sp, 32 * wordSize); ++ } ++ ++ // if the number of popped regs is odd, the reserved slot for alignment will be removed ++ __ pop_reg(RegSet::range(x5, x31), sp); // integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) ++} ++ ++static void restore_live_registers_except_r10(StubAssembler* sasm, bool restore_fpu_registers = true) { ++ if (restore_fpu_registers) { ++ for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ __ fld(as_FloatRegister(i), Address(sp, i * wordSize)); ++ } ++ __ addi(sp, sp, FrameMap::nof_fpu_regs * wordSize); ++ } else { ++ // we define reg_save_layout = 64 as the fixed frame size, ++ // we should also add 32 * wordSize to sp when save_fpu_registers == false ++ __ addi(sp, sp, 32 * wordSize); ++ } ++ ++ // pop integer registers except ra(x1) & sp(x2) & gp(x3) & tp(x4) & x10 ++ // there is one reserved slot for alignment on the stack in save_live_registers(). ++ __ pop_reg(RegSet::range(x5, x9), sp); // pop x5 ~ x9 with the reserved slot for alignment ++ __ pop_reg(RegSet::range(x11, x31), sp); // pop x11 ~ x31; x10 will be automatically skipped here ++} ++ ++void Runtime1::initialize_pd() { ++ int i = 0; ++ int sp_offset = 0; ++ const int step = 2; // SP offsets are in halfwords ++ ++ // all float registers are saved explicitly ++ for (i = 0; i < FrameMap::nof_fpu_regs; i++) { ++ fpu_reg_save_offsets[i] = sp_offset; ++ sp_offset += step; ++ } ++ ++ // a slot reserved for stack 16-byte alignment, see MacroAssembler::push_reg ++ sp_offset += step; ++ // we save x5 ~ x31, except x0 ~ x4: loop starts from x5 ++ for (i = 5; i < FrameMap::nof_cpu_regs; i++) { ++ cpu_reg_save_offsets[i] = sp_offset; ++ sp_offset += step; ++ } ++} ++ ++// target: the entry point of the method that creates and posts the exception oop ++// has_argument: true if the exception needs arguments (passed in t0 and t1) ++ ++OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) { ++ // make a frame and preserve the caller's caller-save registers ++ OopMap* oop_map = save_live_registers(sasm); ++ assert_cond(oop_map != NULL); ++ int call_offset = 0; ++ if (!has_argument) { ++ call_offset = __ call_RT(noreg, noreg, target); ++ } else { ++ __ mv(c_rarg1, t0); ++ __ mv(c_rarg2, t1); ++ call_offset = __ call_RT(noreg, noreg, target); ++ } ++ OopMapSet* oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ ++ __ should_not_reach_here(); ++ return oop_maps; ++} ++ ++OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { ++ __ block_comment("generate_handle_exception"); ++ ++ // incoming parameters ++ const Register exception_oop = x10; ++ const Register exception_pc = x13; ++ ++ OopMapSet* oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ OopMap* oop_map = NULL; ++ ++ switch (id) { ++ case forward_exception_id: ++ // We're handling an exception in the context of a compiled frame. ++ // The registers have been saved in the standard places. Perform ++ // an exception lookup in the caller and dispatch to the handler ++ // if found. Otherwise unwind and dispatch to the callers ++ // exception handler. ++ oop_map = generate_oop_map(sasm, 1 /* thread */); ++ ++ // load and clear pending exception oop into x10 ++ __ ld(exception_oop, Address(xthread, Thread::pending_exception_offset())); ++ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); ++ ++ // load issuing PC (the return address for this stub) into x13 ++ __ ld(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord)); ++ ++ // make sure that the vm_results are cleared (may be unnecessary) ++ __ sd(zr, Address(xthread, JavaThread::vm_result_offset())); ++ __ sd(zr, Address(xthread, JavaThread::vm_result_2_offset())); ++ break; ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ // At this point all registers MAY be live. ++ oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); ++ break; ++ case handle_exception_from_callee_id: { ++ // At this point all registers except exception oop (x10) and ++ // exception pc (ra) are dead. ++ const int frame_size = 2 /* fp, return address */; ++ oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); ++ sasm->set_frame_size(frame_size); ++ break; ++ } ++ default: ++ __ should_not_reach_here(); ++ break; ++ } ++ ++ // verify that only x10 and x13 are valid at this time ++ __ invalidate_registers(false, true, true, false, true, true); ++ // verify that x10 contains a valid exception ++ __ verify_not_null_oop(exception_oop); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are ++ // empty before writing to them ++ Label oop_empty; ++ __ ld(t0, Address(xthread, JavaThread::exception_oop_offset())); ++ __ beqz(t0, oop_empty); ++ __ stop("exception oop already set"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); ++ __ beqz(t0, pc_empty); ++ __ stop("exception pc already set"); ++ __ bind(pc_empty); ++#endif ++ ++ // save exception oop and issuing pc into JavaThread ++ // (exception handler will load it from here) ++ __ sd(exception_oop, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(exception_pc, Address(xthread, JavaThread::exception_pc_offset())); ++ ++ // patch throwing pc into return address (has bci & oop map) ++ __ sd(exception_pc, Address(fp, frame::return_addr_offset * BytesPerWord)); ++ ++ // compute the exception handler. ++ // the exception oop and the throwing pc are read from the fields in JavaThread ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); ++ guarantee(oop_map != NULL, "NULL oop_map!"); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ ++ // x10: handler address ++ // will be the deopt blob if nmethod was deoptimized while we looked up ++ // handler regardless of whether handler existed in the nmethod. ++ ++ // only x10 is valid at this time, all other registers have been destroyed by the runtime call ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++ // patch the return address, this stub will directly return to the exception handler ++ __ sd(x10, Address(fp, frame::return_addr_offset * BytesPerWord)); ++ ++ switch (id) { ++ case forward_exception_id: ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ // Restore the registers that were saved at the beginning. ++ restore_live_registers(sasm, id != handle_exception_nofpu_id); ++ break; ++ case handle_exception_from_callee_id: ++ // Pop the return address. ++ __ leave(); ++ __ ret(); // jump to exception handler ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ return oop_maps; ++} ++ ++ ++void Runtime1::generate_unwind_exception(StubAssembler *sasm) { ++ // incoming parameters ++ const Register exception_oop = x10; ++ // other registers used in this stub ++ const Register handler_addr = x11; ++ ++ // verify that only x10, is valid at this time ++ __ invalidate_registers(false, true, true, true, true, true); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld(t0, Address(xthread, JavaThread::exception_oop_offset())); ++ __ beqz(t0, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); ++ __ beqz(t0, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); ++#endif ++ ++ // Save our return address because ++ // exception_handler_for_return_address will destroy it. We also ++ // save exception_oop ++ __ addi(sp, sp, -2 * wordSize); ++ __ sd(exception_oop, Address(sp, wordSize)); ++ __ sd(ra, Address(sp)); ++ ++ // search the exception handler address of the caller (using the return address) ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), xthread, ra); ++ // x10: exception handler address of the caller ++ ++ // Only x10 is valid at this time; all other registers have been ++ // destroyed by the call. ++ __ invalidate_registers(false, true, true, true, false, true); ++ ++ // move result of call into correct register ++ __ mv(handler_addr, x10); ++ ++ // get throwing pc (= return address). ++ // ra has been destroyed by the call ++ __ ld(ra, Address(sp)); ++ __ ld(exception_oop, Address(sp, wordSize)); ++ __ addi(sp, sp, 2 * wordSize); ++ __ mv(x13, ra); ++ ++ __ verify_not_null_oop(exception_oop); ++ ++ // continue at exception handler (return address removed) ++ // note: do *not* remove arguments when unwinding the ++ // activation since the caller assumes having ++ // all arguments on the stack when entering the ++ // runtime to determine the exception handler ++ // (GC happens at call site with arguments!) ++ // x10: exception oop ++ // x13: throwing pc ++ // x11: exception handler ++ __ jr(handler_addr); ++} ++ ++OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { ++ // use the maximum number of runtime-arguments here because it is difficult to ++ // distinguish each RT-Call. ++ // Note: This number affects also the RT-Call in generate_handle_exception because ++ // the oop-map is shared for all calls. ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ ++ OopMap* oop_map = save_live_registers(sasm); ++ assert_cond(oop_map != NULL); ++ ++ __ mv(c_rarg0, xthread); ++ Label retaddr; ++ __ set_last_Java_frame(sp, fp, retaddr, t0); ++ // do the call ++ int32_t off = 0; ++ __ la_patchable(t0, RuntimeAddress(target), off); ++ __ jalr(x1, t0, off); ++ __ bind(retaddr); ++ OopMapSet* oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ oop_maps->add_gc_map(__ offset(), oop_map); ++ // verify callee-saved register ++#ifdef ASSERT ++ { Label L; ++ __ get_thread(t0); ++ __ beq(xthread, t0, L); ++ __ stop("StubAssembler::call_RT: xthread not callee saved?"); ++ __ bind(L); ++ } ++#endif ++ __ reset_last_Java_frame(true); ++ ++ // check for pending exceptions ++ { ++ Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, L); ++ // exception pending => remove activation and forward to exception handler ++ ++ { Label L1; ++ __ bnez(x10, L1); // have we deoptimized? ++ __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); ++ __ bind(L1); ++ } ++ ++ // the deopt blob expects exceptions in the special fields of ++ // JavaThread, so copy and clear pending exception. ++ ++ // load and clear pending exception ++ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); ++ ++ // check that there is really a valid exception ++ __ verify_not_null_oop(x10); ++ ++ // load throwing pc: this is the return address of the stub ++ __ ld(x13, Address(fp, wordSize)); ++ ++#ifdef ASSERT ++ // check that fields in JavaThread for exception oop and issuing pc are empty ++ Label oop_empty; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, oop_empty); ++ __ stop("exception oop must be empty"); ++ __ bind(oop_empty); ++ ++ Label pc_empty; ++ __ ld(t0, Address(xthread, JavaThread::exception_pc_offset())); ++ __ beqz(t0, pc_empty); ++ __ stop("exception pc must be empty"); ++ __ bind(pc_empty); ++#endif ++ ++ // store exception oop and throwing pc to JavaThread ++ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ ++ restore_live_registers(sasm); ++ ++ __ leave(); ++ ++ // Forward the exception directly to deopt blob. We can blow no ++ // registers and must leave throwing pc on the stack. A patch may ++ // have values live in registers so the entry point with the ++ // exception in tls. ++ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls())); ++ ++ __ bind(L); ++ } ++ ++ // Runtime will return true if the nmethod has been deoptimized during ++ // the patching process. In that case we must do a deopt reexecute instead. ++ Label cont; ++ ++ __ beqz(x10, cont); // have we deoptimized? ++ ++ // Will reexecute. Proper return address is already on the stack we just restore ++ // registers, pop all of our frame but the return address and jump to the deopt blob ++ restore_live_registers(sasm); ++ __ leave(); ++ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); ++ ++ __ bind(cont); ++ restore_live_registers(sasm); ++ __ leave(); ++ __ ret(); ++ ++ return oop_maps; ++} ++ ++OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { ++ // for better readability ++ const bool dont_gc_arguments = false; ++ ++ // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu ++ bool save_fpu_registers = true; ++ ++ // stub code & info for the different stubs ++ OopMapSet* oop_maps = NULL; ++ switch (id) { ++ { ++ case forward_exception_id: ++ { ++ oop_maps = generate_handle_exception(id, sasm); ++ __ leave(); ++ __ ret(); ++ } ++ break; ++ ++ case throw_div0_exception_id: ++ { ++ StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); ++ } ++ break; ++ ++ case throw_null_pointer_exception_id: ++ { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); ++ } ++ break; ++ ++ case new_instance_id: ++ case fast_new_instance_id: ++ case fast_new_instance_init_check_id: ++ { ++ Register klass = x13; // Incoming ++ Register obj = x10; // Result ++ ++ if (id == new_instance_id) { ++ __ set_info("new_instance", dont_gc_arguments); ++ } else if (id == fast_new_instance_id) { ++ __ set_info("fast new_instance", dont_gc_arguments); ++ } else { ++ assert(id == fast_new_instance_init_check_id, "bad StubID"); ++ __ set_info("fast new_instance init check", dont_gc_arguments); ++ } ++ ++ // If TLAB is disabled, see if there is support for inlining contiguous ++ // allocations. ++ // Otherwise, just go to the slow path. ++ if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && ++ !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { ++ Label slow_path; ++ Register obj_size = x12; ++ Register tmp1 = x9; ++ Register tmp2 = x14; ++ assert_different_registers(klass, obj, obj_size, tmp1, tmp2); ++ ++ const int sp_offset = 2; ++ const int x9_offset = 1; ++ const int zr_offset = 0; ++ __ addi(sp, sp, -(sp_offset * wordSize)); ++ __ sd(x9, Address(sp, x9_offset * wordSize)); ++ __ sd(zr, Address(sp, zr_offset * wordSize)); ++ ++ if (id == fast_new_instance_init_check_id) { ++ // make sure the klass is initialized ++ __ lbu(t0, Address(klass, InstanceKlass::init_state_offset())); ++ __ mv(t1, InstanceKlass::fully_initialized); ++ __ bne(t0, t1, slow_path); ++ } ++ ++#ifdef ASSERT ++ // assert object can be fast path allocated ++ { ++ Label ok, not_ok; ++ __ lw(obj_size, Address(klass, Klass::layout_helper_offset())); ++ // make sure it's an instance. For instances, layout helper is a positive number. ++ // For arrays, layout helper is a negative number ++ __ blez(obj_size, not_ok); ++ __ andi(t0, obj_size, Klass::_lh_instance_slow_path_bit); ++ __ beqz(t0, ok); ++ __ bind(not_ok); ++ __ stop("assert(can be fast path allocated)"); ++ __ should_not_reach_here(); ++ __ bind(ok); ++ } ++#endif // ASSERT ++ ++ // get the instance size ++ __ lwu(obj_size, Address(klass, Klass::layout_helper_offset())); ++ ++ __ eden_allocate(obj, obj_size, 0, tmp1, slow_path); ++ ++ __ initialize_object(obj, klass, obj_size, 0, tmp1, tmp2, /* is_tlab_allocated */ false); ++ __ verify_oop(obj); ++ __ ld(x9, Address(sp, x9_offset * wordSize)); ++ __ ld(zr, Address(sp, zr_offset * wordSize)); ++ __ addi(sp, sp, sp_offset * wordSize); ++ __ ret(); ++ ++ __ bind(slow_path); ++ __ ld(x9, Address(sp, x9_offset * wordSize)); ++ __ ld(zr, Address(sp, zr_offset * wordSize)); ++ __ addi(sp, sp, sp_offset * wordSize); ++ } ++ ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ assert_cond(map != NULL); ++ int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); ++ oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_r10(sasm); ++ __ verify_oop(obj); ++ __ leave(); ++ __ ret(); ++ ++ // x10: new instance ++ } ++ ++ break; ++ ++ case counter_overflow_id: ++ { ++ Register bci = x10; ++ Register method = x11; ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ assert_cond(map != NULL); ++ ++ const int bci_off = 0; ++ const int method_off = 1; ++ // Retrieve bci ++ __ lw(bci, Address(fp, bci_off * BytesPerWord)); ++ // And a pointer to the Method* ++ __ ld(method, Address(fp, method_off * BytesPerWord)); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); ++ oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm); ++ __ leave(); ++ __ ret(); ++ } ++ break; ++ ++ case new_type_array_id: ++ case new_object_array_id: ++ { ++ Register length = x9; // Incoming ++ Register klass = x13; // Incoming ++ Register obj = x10; // Result ++ ++ if (id == new_type_array_id) { ++ __ set_info("new_type_array", dont_gc_arguments); ++ } else { ++ __ set_info("new_object_array", dont_gc_arguments); ++ } ++ ++#ifdef ASSERT ++ // assert object type is really an array of the proper kind ++ { ++ Label ok; ++ Register tmp = obj; ++ __ lwu(tmp, Address(klass, Klass::layout_helper_offset())); ++ __ sraiw(tmp, tmp, Klass::_lh_array_tag_shift); ++ int tag = ((id == new_type_array_id) ? Klass::_lh_array_tag_type_value : Klass::_lh_array_tag_obj_value); ++ __ mv(t0, tag); ++ __ beq(t0, tmp, ok); ++ __ stop("assert(is an array klass)"); ++ __ should_not_reach_here(); ++ __ bind(ok); ++ } ++#endif // ASSERT ++ ++ // If TLAB is disabled, see if there is support for inlining contiguous ++ // allocations. ++ // Otherwise, just go to the slow path. ++ if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { ++ Register arr_size = x14; ++ Register tmp1 = x12; ++ Register tmp2 = x15; ++ Label slow_path; ++ assert_different_registers(length, klass, obj, arr_size, tmp1, tmp2); ++ ++ // check that array length is small enough for fast path. ++ __ mv(t0, C1_MacroAssembler::max_array_allocation_length); ++ __ bgtu(length, t0, slow_path); ++ ++ // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) ++ __ lwu(tmp1, Address(klass, Klass::layout_helper_offset())); ++ __ andi(t0, tmp1, 0x1f); ++ __ sll(arr_size, length, t0); ++ int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1); ++ int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width; ++ __ slli(tmp1, tmp1, XLEN - lh_header_size_msb); ++ __ srli(tmp1, tmp1, XLEN - lh_header_size_width); ++ __ add(arr_size, arr_size, tmp1); ++ __ addi(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up ++ __ andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask); ++ ++ __ eden_allocate(obj, arr_size, 0, tmp1, slow_path); // preserves arr_size ++ ++ __ initialize_header(obj, klass, length, tmp1, tmp2); ++ __ lbu(tmp1, Address(klass, ++ in_bytes(Klass::layout_helper_offset()) + ++ (Klass::_lh_header_size_shift / BitsPerByte))); ++ assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); ++ assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); ++ __ andi(tmp1, tmp1, Klass::_lh_header_size_mask); ++ __ sub(arr_size, arr_size, tmp1); // body length ++ __ add(tmp1, tmp1, obj); // body start ++ __ initialize_body(tmp1, arr_size, 0, tmp2); ++ __ membar(MacroAssembler::StoreStore); ++ __ verify_oop(obj); ++ ++ __ ret(); ++ ++ __ bind(slow_path); ++ } ++ ++ __ enter(); ++ OopMap* map = save_live_registers(sasm); ++ assert_cond(map != NULL); ++ int call_offset = 0; ++ if (id == new_type_array_id) { ++ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); ++ } else { ++ call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); ++ } ++ ++ oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_r10(sasm); ++ ++ __ verify_oop(obj); ++ __ leave(); ++ __ ret(); ++ ++ // x10: new array ++ } ++ break; ++ ++ case new_multi_array_id: ++ { ++ StubFrame f(sasm, "new_multi_array", dont_gc_arguments); ++ // x10: klass ++ // x9: rank ++ // x12: address of 1st dimension ++ OopMap* map = save_live_registers(sasm); ++ assert_cond(map != NULL); ++ __ mv(c_rarg1, x10); ++ __ mv(c_rarg3, x12); ++ __ mv(c_rarg2, x9); ++ int call_offset = __ call_RT(x10, noreg, CAST_FROM_FN_PTR(address, new_multi_array), x11, x12, x13); ++ ++ oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers_except_r10(sasm); ++ ++ // x10: new multi array ++ __ verify_oop(x10); ++ } ++ break; ++ ++ case register_finalizer_id: ++ { ++ __ set_info("register_finalizer", dont_gc_arguments); ++ ++ // This is called via call_runtime so the arguments ++ // will be place in C abi locations ++ __ verify_oop(c_rarg0); ++ ++ // load the klass and check the has finalizer flag ++ Label register_finalizer; ++ Register t = x15; ++ __ load_klass(t, x10); ++ __ lwu(t, Address(t, Klass::access_flags_offset())); ++ __ andi(t0, t, JVM_ACC_HAS_FINALIZER); ++ __ bnez(t0, register_finalizer); ++ __ ret(); ++ ++ __ bind(register_finalizer); ++ __ enter(); ++ OopMap* oop_map = save_live_registers(sasm); ++ assert_cond(oop_map != NULL); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), x10); ++ oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ ++ // Now restore all the live registers ++ restore_live_registers(sasm); ++ ++ __ leave(); ++ __ ret(); ++ } ++ break; ++ ++ case throw_class_cast_exception_id: ++ { ++ StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); ++ } ++ break; ++ ++ case throw_incompatible_class_change_error_id: ++ { ++ StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); ++ oop_maps = generate_exception_throw(sasm, ++ CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); ++ } ++ break; ++ ++ case slow_subtype_check_id: ++ { ++ // Typical calling sequence: ++ // push klass_RInfo (object klass or other subclass) ++ // push sup_k_RInfo (array element klass or other superclass) ++ // jump to slow_subtype_check ++ // Note that the subclass is pushed first, and is therefore deepest. ++ enum layout { ++ x10_off, x10_off_hi, ++ x12_off, x12_off_hi, ++ x14_off, x14_off_hi, ++ x15_off, x15_off_hi, ++ sup_k_off, sup_k_off_hi, ++ klass_off, klass_off_hi, ++ framesize, ++ result_off = sup_k_off ++ }; ++ ++ __ set_info("slow_subtype_check", dont_gc_arguments); ++ __ push_reg(RegSet::of(x10, x12, x14, x15), sp); ++ ++ __ ld(x14, Address(sp, (klass_off) * VMRegImpl::stack_slot_size)); // sub klass ++ __ ld(x10, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); // super klass ++ ++ Label miss; ++ __ check_klass_subtype_slow_path(x14, x10, x12, x15, NULL, &miss); ++ ++ // fallthrough on success: ++ __ mv(t0, 1); ++ __ sd(t0, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result ++ __ pop_reg(RegSet::of(x10, x12, x14, x15), sp); ++ __ ret(); ++ ++ __ bind(miss); ++ __ sd(zr, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result ++ __ pop_reg(RegSet::of(x10, x12, x14, x15), sp); ++ __ ret(); ++ } ++ break; ++ ++ case monitorenter_nofpu_id: ++ save_fpu_registers = false; ++ // fall through ++ case monitorenter_id: ++ { ++ StubFrame f(sasm, "monitorenter", dont_gc_arguments); ++ OopMap* map = save_live_registers(sasm, save_fpu_registers); ++ assert_cond(map != NULL); ++ ++ // Called with store_parameter and not C abi ++ f.load_argument(1, x10); // x10: object ++ f.load_argument(0, x11); // x11: lock address ++ ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), x10, x11); ++ ++ oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm, save_fpu_registers); ++ } ++ break; ++ ++ case monitorexit_nofpu_id: ++ save_fpu_registers = false; ++ // fall through ++ case monitorexit_id: ++ { ++ StubFrame f(sasm, "monitorexit", dont_gc_arguments); ++ OopMap* map = save_live_registers(sasm, save_fpu_registers); ++ assert_cond(map != NULL); ++ ++ // Called with store_parameter and not C abi ++ f.load_argument(0, x10); // x10: lock address ++ ++ // note: really a leaf routine but must setup last java sp ++ // => use call_RT for now (speed can be improved by ++ // doing last java sp setup manually) ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), x10); ++ ++ oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm, save_fpu_registers); ++ } ++ break; ++ ++ case deoptimize_id: ++ { ++ StubFrame f(sasm, "deoptimize", dont_gc_arguments); ++ OopMap* oop_map = save_live_registers(sasm); ++ assert_cond(oop_map != NULL); ++ f.load_argument(0, c_rarg1); ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), c_rarg1); ++ ++ oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ oop_maps->add_gc_map(call_offset, oop_map); ++ restore_live_registers(sasm); ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ __ leave(); ++ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); ++ } ++ break; ++ ++ case throw_range_check_failed_id: ++ { ++ StubFrame f(sasm, "range_check_failed", dont_gc_arguments); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); ++ } ++ break; ++ ++ case unwind_exception_id: ++ { ++ __ set_info("unwind_exception", dont_gc_arguments); ++ // note: no stubframe since we are about to leave the current ++ // activation and we are calling a leaf VM function only. ++ generate_unwind_exception(sasm); ++ } ++ break; ++ ++ case access_field_patching_id: ++ { ++ StubFrame f(sasm, "access_field_patching", dont_gc_arguments); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); ++ } ++ break; ++ ++ case load_klass_patching_id: ++ { ++ StubFrame f(sasm, "load_klass_patching", dont_gc_arguments); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); ++ } ++ break; ++ ++ case load_mirror_patching_id: ++ { ++ StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); ++ } ++ break; ++ ++ case load_appendix_patching_id: ++ { ++ StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments); ++ // we should set up register map ++ oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); ++ } ++ break; ++ ++ case handle_exception_nofpu_id: ++ case handle_exception_id: ++ { ++ StubFrame f(sasm, "handle_exception", dont_gc_arguments); ++ oop_maps = generate_handle_exception(id, sasm); ++ } ++ break; ++ ++ case handle_exception_from_callee_id: ++ { ++ StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments); ++ oop_maps = generate_handle_exception(id, sasm); ++ } ++ break; ++ ++ case throw_index_exception_id: ++ { ++ StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments); ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); ++ } ++ break; ++ ++ case throw_array_store_exception_id: ++ { ++ StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments); ++ // tos + 0: link ++ // + 1: return address ++ oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); ++ } ++ break; ++ ++ case predicate_failed_trap_id: ++ { ++ StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments); ++ ++ OopMap* map = save_live_registers(sasm); ++ assert_cond(map != NULL); ++ ++ int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); ++ oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ oop_maps->add_gc_map(call_offset, map); ++ restore_live_registers(sasm); ++ __ leave(); ++ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); ++ assert(deopt_blob != NULL, "deoptimization blob must have been created"); ++ ++ __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); ++ } ++ break; ++ ++ case dtrace_object_alloc_id: ++ { // c_rarg0: object ++ StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); ++ save_live_registers(sasm); ++ ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), c_rarg0); ++ ++ restore_live_registers(sasm); ++ } ++ break; ++ ++ default: ++ { ++ StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); ++ __ mv(x10, (int)id); ++ __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), x10); ++ __ should_not_reach_here(); ++ } ++ break; ++ } ++ } ++ return oop_maps; ++} ++ ++#undef __ ++ ++const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; } +diff --git a/src/hotspot/cpu/riscv/c1_globals_riscv.hpp b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +new file mode 100644 +index 000000000..974c8fe76 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c1_globals_riscv.hpp +@@ -0,0 +1,72 @@ ++/* ++ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_C1_GLOBALS_RISCV_HPP ++#define CPU_RISCV_C1_GLOBALS_RISCV_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the client compiler. ++// (see c1_globals.hpp) ++ ++#ifndef TIERED ++define_pd_global(bool, BackgroundCompilation, true ); ++define_pd_global(bool, UseTLAB, true ); ++define_pd_global(bool, ResizeTLAB, true ); ++define_pd_global(bool, InlineIntrinsics, true ); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, false); ++define_pd_global(bool, UseOnStackReplacement, true ); ++define_pd_global(bool, TieredCompilation, false); ++define_pd_global(intx, CompileThreshold, 1500 ); ++ ++define_pd_global(intx, OnStackReplacePercentage, 933 ); ++define_pd_global(intx, FreqInlineSize, 325 ); ++define_pd_global(intx, NewSizeThreadIncrease, 4*K ); ++define_pd_global(intx, InitialCodeCacheSize, 160*K); ++define_pd_global(intx, ReservedCodeCacheSize, 32*M ); ++define_pd_global(intx, NonProfiledCodeHeapSize, 13*M ); ++define_pd_global(intx, ProfiledCodeHeapSize, 14*M ); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(bool, ProfileInterpreter, false); ++define_pd_global(intx, CodeCacheExpansionSize, 32*K ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 1); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++define_pd_global(uintx, MetaspaceSize, 12*M ); ++define_pd_global(bool, NeverActAsServerClassMachine, true ); ++define_pd_global(uint64_t,MaxRAM, 1ULL*G); ++define_pd_global(bool, CICompileOSR, true ); ++#endif // !TIERED ++define_pd_global(bool, UseTypeProfile, false); ++define_pd_global(bool, RoundFPResults, true ); ++ ++define_pd_global(bool, LIRFillDelaySlots, false); ++define_pd_global(bool, OptimizeSinglePrecision, true ); ++define_pd_global(bool, CSEArrayLength, false); ++define_pd_global(bool, TwoOperandLIRForm, false ); ++ ++#endif // CPU_RISCV_C1_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c2_globals_riscv.hpp b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +new file mode 100644 +index 000000000..bf4efa629 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c2_globals_riscv.hpp +@@ -0,0 +1,91 @@ ++/* ++ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_C2_GLOBALS_RISCV_HPP ++#define CPU_RISCV_C2_GLOBALS_RISCV_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the server compiler. ++// (see c2_globals.hpp). Alpha-sorted. ++ ++define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); ++define_pd_global(bool, CICompileOSR, true); ++define_pd_global(bool, InlineIntrinsics, true); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, true); ++define_pd_global(bool, UseOnStackReplacement, true); ++define_pd_global(bool, ProfileInterpreter, true); ++define_pd_global(bool, TieredCompilation, trueInTiered); ++define_pd_global(intx, CompileThreshold, 10000); ++ ++define_pd_global(intx, OnStackReplacePercentage, 140); ++define_pd_global(intx, ConditionalMoveLimit, 0); ++define_pd_global(intx, FLOATPRESSURE, 64); ++define_pd_global(intx, FreqInlineSize, 325); ++define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, INTPRESSURE, 24); ++define_pd_global(intx, InteriorEntryAlignment, 16); ++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); ++define_pd_global(intx, LoopUnrollLimit, 60); ++define_pd_global(intx, LoopPercentProfileLimit, 10); ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 64*K); ++ ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 128ULL*G); ++define_pd_global(intx, RegisterCostAreaRatio, 16000); ++ ++// Peephole and CISC spilling both break the graph, and so makes the ++// scheduler sick. ++define_pd_global(bool, OptoPeephole, false); ++define_pd_global(bool, UseCISCSpill, false); ++define_pd_global(bool, OptoScheduling, true); ++define_pd_global(bool, OptoBundling, false); ++define_pd_global(bool, OptoRegScheduling, false); ++define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); ++define_pd_global(bool, IdealizeClearArrayNode, true); ++ ++define_pd_global(intx, ReservedCodeCacheSize, 48*M); ++define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); ++define_pd_global(intx, ProfiledCodeHeapSize, 22*M); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 4); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++ ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); ++ ++// Ergonomics related flags ++define_pd_global(bool, NeverActAsServerClassMachine, false); ++ ++define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed. ++ ++#endif // CPU_RISCV_C2_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/c2_init_riscv.cpp b/src/hotspot/cpu/riscv/c2_init_riscv.cpp +new file mode 100644 +index 000000000..3cb4a4995 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/c2_init_riscv.cpp +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++ ++// processor dependent initialization for riscv ++ ++extern void reg_mask_init(); ++ ++void Compile::pd_compiler2_init() { ++ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++ reg_mask_init(); ++} +diff --git a/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp +new file mode 100644 +index 000000000..881900892 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/codeBuffer_riscv.hpp +@@ -0,0 +1,36 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_CODEBUFFER_RISCV_HPP ++#define CPU_RISCV_CODEBUFFER_RISCV_HPP ++ ++private: ++ void pd_initialize() {} ++ ++public: ++ void flush_bundle(bool start_new_bundle) {} ++ ++#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/compiledIC_riscv.cpp b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +new file mode 100644 +index 000000000..0354a93a0 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/compiledIC_riscv.cpp +@@ -0,0 +1,154 @@ ++/* ++ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/compiledIC.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nmethod.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/safepoint.hpp" ++ ++// ---------------------------------------------------------------------------- ++ ++#define __ _masm. ++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { ++ precond(cbuf.stubs()->start() != badAddress); ++ precond(cbuf.stubs()->end() != badAddress); ++ // Stub is fixed up when the corresponding call is converted from ++ // calling compiled code to calling interpreted code. ++ // mv xmethod, 0 ++ // jalr -4 # to self ++ ++ if (mark == NULL) { ++ mark = cbuf.insts_mark(); // Get mark within main instrs section. ++ } ++ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a stub. ++ MacroAssembler _masm(&cbuf); ++ ++ address base = __ start_a_stub(to_interp_stub_size()); ++ int offset = __ offset(); ++ if (base == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ // static stub relocation stores the instruction address of the call ++ __ relocate(static_stub_Relocation::spec(mark)); ++ ++ __ emit_static_call_stub(); ++ ++ assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big"); ++ __ end_a_stub(); ++ return base; ++} ++#undef __ ++ ++int CompiledStaticCall::to_interp_stub_size() { ++ // (lui, addi, slli, addi, slli, addi) + (lui, addi, slli, addi, slli) + jalr ++ return 12 * NativeInstruction::instruction_size; ++} ++ ++int CompiledStaticCall::to_trampoline_stub_size() { ++ // Somewhat pessimistically, we count four instructions here (although ++ // there are only three) because we sometimes emit an alignment nop. ++ // Trampoline stubs are always word aligned. ++ return NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size; ++} ++ ++// Relocation entries for call stub, compiled java to interpreter. ++int CompiledStaticCall::reloc_to_interp_stub() { ++ return 4; // 3 in emit_to_interp_stub + 1 in emit_call ++} ++ ++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { ++ address stub = find_stub(false /* is_aot */); ++ guarantee(stub != NULL, "stub not found"); ++ ++ if (TraceICs) { ++ ResourceMark rm; ++ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", ++ p2i(instruction_address()), ++ callee->name_and_sig_as_C_string()); ++ } ++ ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder ++ = nativeMovConstReg_at(stub); ++#ifndef PRODUCT ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ // read the value once ++ volatile intptr_t data = method_holder->data(); ++ assert(data == 0 || data == (intptr_t)callee(), ++ "a) MT-unsafe modification of inline cache"); ++ assert(data == 0 || jump->jump_destination() == entry, ++ "b) MT-unsafe modification of inline cache"); ++#endif ++ // Update stub. ++ method_holder->set_data((intptr_t)callee()); ++ NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry); ++ ICache::invalidate_range(stub, to_interp_stub_size()); ++ // Update jump to call. ++ set_destination_mt_safe(stub); ++} ++ ++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); ++ // Reset stub. ++ address stub = static_stub->addr(); ++ assert(stub != NULL, "stub not found"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder ++ = nativeMovConstReg_at(stub); ++ method_holder->set_data(0); ++} ++ ++//----------------------------------------------------------------------------- ++// Non-product mode code ++#ifndef PRODUCT ++ ++void CompiledDirectStaticCall::verify() { ++ // Verify call. ++ _call->verify(); ++ if (os::is_MP()) { ++ _call->verify_alignment(); ++ } ++ ++ // Verify stub. ++ address stub = find_stub(false /* is_aot */); ++ assert(stub != NULL, "no stub found for static call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder ++ = nativeMovConstReg_at(stub); ++ NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); ++ ++ // Verify state. ++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); ++} ++ ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/riscv/copy_riscv.hpp b/src/hotspot/cpu/riscv/copy_riscv.hpp +new file mode 100644 +index 000000000..011e965ad +--- /dev/null ++++ b/src/hotspot/cpu/riscv/copy_riscv.hpp +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_COPY_RISCV_HPP ++#define CPU_RISCV_COPY_RISCV_HPP ++ ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++#include OS_CPU_HEADER_INLINE(copy) ++ ++ ++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { ++ julong* to = (julong*) tohw; ++ julong v = ((julong) value << 32) | value; ++ while (count-- > 0) { ++ *to++ = v; ++ } ++} ++ ++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { ++ pd_fill_to_words(tohw, count, value); ++} ++ ++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { ++ (void)memset(to, value, count); ++} ++ ++static void pd_zero_to_words(HeapWord* tohw, size_t count) { ++ pd_fill_to_words(tohw, count, 0); ++} ++ ++static void pd_zero_to_bytes(void* to, size_t count) { ++ (void)memset(to, 0, count); ++} ++ ++#endif // CPU_RISCV_COPY_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/depChecker_riscv.hpp b/src/hotspot/cpu/riscv/depChecker_riscv.hpp +new file mode 100644 +index 000000000..31cee7103 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/depChecker_riscv.hpp +@@ -0,0 +1,32 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_VM_DEPCHECKER_RISCV_HPP ++#define CPU_RISCV_VM_DEPCHECKER_RISCV_HPP ++ ++// Nothing to do on riscv ++ ++#endif // CPU_RISCV_VM_DEPCHECKER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/disassembler_riscv.hpp b/src/hotspot/cpu/riscv/disassembler_riscv.hpp +new file mode 100644 +index 000000000..e97b89327 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/disassembler_riscv.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_DISASSEMBLER_RISCV_HPP ++#define CPU_RISCV_DISASSEMBLER_RISCV_HPP ++ ++ static int pd_instruction_alignment() { ++ return 1; ++ } ++ ++ static const char* pd_cpu_opts() { ++ return ""; ++ } ++ ++#endif // CPU_RISCV_DISASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/frame_riscv.cpp b/src/hotspot/cpu/riscv/frame_riscv.cpp +new file mode 100644 +index 000000000..be6f1a67f +--- /dev/null ++++ b/src/hotspot/cpu/riscv/frame_riscv.cpp +@@ -0,0 +1,683 @@ ++/* ++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/markOop.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/monitorChunk.hpp" ++#include "runtime/os.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_riscv.inline.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_Runtime1.hpp" ++#include "runtime/vframeArray.hpp" ++#endif ++ ++#ifdef ASSERT ++void RegisterMap::check_location_valid() { ++} ++#endif ++ ++ ++// Profiling/safepoint support ++ ++bool frame::safe_for_sender(JavaThread *thread) { ++ address addr_sp = (address)_sp; ++ address addr_fp = (address)_fp; ++ address unextended_sp = (address)_unextended_sp; ++ ++ // consider stack guards when trying to determine "safe" stack pointers ++ static size_t stack_guard_size = os::uses_stack_guard_pages() ? ++ (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0; ++ assert_cond(thread != NULL); ++ size_t usable_stack_size = thread->stack_size() - stack_guard_size; ++ ++ // sp must be within the usable part of the stack (not in guards) ++ bool sp_safe = (addr_sp < thread->stack_base()) && ++ (addr_sp >= thread->stack_base() - usable_stack_size); ++ ++ if (!sp_safe) { ++ return false; ++ } ++ ++ // When we are running interpreted code the machine stack pointer, SP, is ++ // set low enough so that the Java expression stack can grow and shrink ++ // without ever exceeding the machine stack bounds. So, ESP >= SP. ++ ++ // When we call out of an interpreted method, SP is incremented so that ++ // the space between SP and ESP is removed. The SP saved in the callee's ++ // frame is the SP *before* this increment. So, when we walk a stack of ++ // interpreter frames the sender's SP saved in a frame might be less than ++ // the SP at the point of call. ++ ++ // So unextended sp must be within the stack but we need not to check ++ // that unextended sp >= sp ++ ++ bool unextended_sp_safe = (unextended_sp < thread->stack_base()); ++ ++ if (!unextended_sp_safe) { ++ return false; ++ } ++ ++ // an fp must be within the stack and above (but not equal) sp ++ // second evaluation on fp+ is added to handle situation where fp is -1 ++ bool fp_safe = (addr_fp < thread->stack_base() && (addr_fp > addr_sp) && ++ (((addr_fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); ++ ++ // We know sp/unextended_sp are safe only fp is questionable here ++ ++ // If the current frame is known to the code cache then we can attempt to ++ // to construct the sender and do some validation of it. This goes a long way ++ // toward eliminating issues when we get in frame construction code ++ ++ if (_cb != NULL) { ++ ++ // First check if frame is complete and tester is reliable ++ // Unfortunately we can only check frame complete for runtime stubs and nmethod ++ // other generic buffer blobs are more problematic so we just assume they are ++ // ok. adapter blobs never have a frame complete and are never ok. ++ ++ if (!_cb->is_frame_complete_at(_pc)) { ++ if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { ++ return false; ++ } ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!_cb->code_contains(_pc)) { ++ return false; ++ } ++ ++ // Entry frame checks ++ if (is_entry_frame()) { ++ // an entry frame must have a valid fp. ++ return fp_safe && is_entry_frame_valid(thread); ++ } ++ ++ intptr_t* sender_sp = NULL; ++ intptr_t* sender_unextended_sp = NULL; ++ address sender_pc = NULL; ++ intptr_t* saved_fp = NULL; ++ ++ if (is_interpreted_frame()) { ++ // fp must be safe ++ if (!fp_safe) { ++ return false; ++ } ++ ++ sender_pc = (address)this->fp()[return_addr_offset]; ++ // for interpreted frames, the value below is the sender "raw" sp, ++ // which can be different from the sender unextended sp (the sp seen ++ // by the sender) because of current frame local variables ++ sender_sp = (intptr_t*) addr_at(sender_sp_offset); ++ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; ++ saved_fp = (intptr_t*) this->fp()[link_offset]; ++ } else { ++ // must be some sort of compiled/runtime frame ++ // fp does not have to be safe (although it could be check for c1?) ++ ++ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc ++ if (_cb->frame_size() <= 0) { ++ return false; ++ } ++ ++ sender_sp = _unextended_sp + _cb->frame_size(); ++ // Is sender_sp safe? ++ if ((address)sender_sp >= thread->stack_base()) { ++ return false; ++ } ++ sender_unextended_sp = sender_sp; ++ sender_pc = (address) *(sender_sp + frame::return_addr_offset); ++ saved_fp = (intptr_t*) *(sender_sp + frame::link_offset); ++ } ++ ++ ++ // If the potential sender is the interpreter then we can do some more checking ++ if (Interpreter::contains(sender_pc)) { ++ ++ // fp is always saved in a recognizable place in any code we generate. However ++ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp ++ // is really a frame pointer. ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ return sender.is_interpreted_frame_valid(thread); ++ } ++ ++ // We must always be able to find a recognizable pc ++ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); ++ if (sender_pc == NULL || sender_blob == NULL) { ++ return false; ++ } ++ ++ // Could be a zombie method ++ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { ++ return false; ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!sender_blob->code_contains(sender_pc)) { ++ return false; ++ } ++ ++ // We should never be able to see an adapter if the current frame is something from code cache ++ if (sender_blob->is_adapter_blob()) { ++ return false; ++ } ++ ++ // Could be the call_stub ++ if (StubRoutines::returns_to_call_stub(sender_pc)) { ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ // Validate the JavaCallWrapper an entry frame must have ++ address jcw = (address)sender.entry_frame_call_wrapper(); ++ ++ bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); ++ ++ return jcw_safe; ++ } ++ ++ CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); ++ if (nm != NULL) { ++ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || ++ nm->method()->is_method_handle_intrinsic()) { ++ return false; ++ } ++ } ++ ++ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size ++ // because the return address counts against the callee's frame. ++ if (sender_blob->frame_size() <= 0) { ++ assert(!sender_blob->is_compiled(), "should count return address at least"); ++ return false; ++ } ++ ++ // We should never be able to see anything here except an nmethod. If something in the ++ // code cache (current frame) is called by an entity within the code cache that entity ++ // should not be anything but the call stub (already covered), the interpreter (already covered) ++ // or an nmethod. ++ if (!sender_blob->is_compiled()) { ++ return false; ++ } ++ ++ // Could put some more validation for the potential non-interpreted sender ++ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... ++ ++ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ ++ // We've validated the potential sender that would be created ++ return true; ++ } ++ ++ // Must be native-compiled frame. Since sender will try and use fp to find ++ // linkages it must be safe ++ if (!fp_safe) { ++ return false; ++ } ++ ++ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) ++ if ((address)this->fp()[return_addr_offset] == NULL) { return false; } ++ ++ return true; ++} ++ ++void frame::patch_pc(Thread* thread, address pc) { ++ address* pc_addr = &(((address*) sp())[-1]); ++ if (TracePcPatching) { ++ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", ++ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); ++ } ++ // Either the return address is the original one or we are going to ++ // patch in the same address that's already there. ++ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); ++ *pc_addr = pc; ++ _cb = CodeCache::find_blob(pc); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ assert(original_pc == _pc, "expected original PC to be stored before patching"); ++ _deopt_state = is_deoptimized; ++ // leave _pc as is ++ } else { ++ _deopt_state = not_deoptimized; ++ _pc = pc; ++ } ++} ++ ++bool frame::is_interpreted_frame() const { ++ return Interpreter::contains(pc()); ++} ++ ++int frame::frame_size(RegisterMap* map) const { ++ frame sender = this->sender(map); ++ return sender.sp() - sp(); ++} ++ ++intptr_t* frame::entry_frame_argument_at(int offset) const { ++ // convert offset to index to deal with tsi ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ // Entry frame's arguments are always in relation to unextended_sp() ++ return &unextended_sp()[index]; ++} ++ ++// sender_sp ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ return (intptr_t*) at(interpreter_frame_sender_sp_offset); ++} ++ ++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++} ++ ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); ++ // make sure the pointer points inside the frame ++ assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer"); ++ assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer"); ++ return result; ++} ++ ++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { ++ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; ++} ++ ++// Used by template based interpreter deoptimization ++void frame::interpreter_frame_set_last_sp(intptr_t* last_sp) { ++ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = last_sp; ++} ++ ++frame frame::sender_for_entry_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); ++ assert(!entry_frame_is_first(), "next Java fp must be non zero"); ++ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); ++ // Since we are walking the stack now this nested anchor is obviously walkable ++ // even if it wasn't when it was stacked. ++ if (!jfa->walkable()) { ++ // Capture _last_Java_pc (if needed) and mark anchor walkable. ++ jfa->capture_last_Java_pc(); ++ } ++ map->clear(); ++ assert(map->include_argument_oops(), "should be set by clear"); ++ vmassert(jfa->last_Java_pc() != NULL, "not walkable"); ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); ++ return fr; ++} ++ ++//------------------------------------------------------------------------------ ++// frame::verify_deopt_original_pc ++// ++// Verifies the calculated original PC of a deoptimization PC for the ++// given unextended SP. ++#ifdef ASSERT ++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { ++ frame fr; ++ ++ // This is ugly but it's better than to change {get,set}_original_pc ++ // to take an SP value as argument. And it's only a debugging ++ // method anyway. ++ fr._unextended_sp = unextended_sp; ++ ++ assert_cond(nm != NULL); ++ address original_pc = nm->get_original_pc(&fr); ++ assert(nm->insts_contains_inclusive(original_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++} ++#endif ++ ++//------------------------------------------------------------------------------ ++// frame::adjust_unextended_sp ++void frame::adjust_unextended_sp() { ++ // On riscv, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ if (_cb != NULL) { ++ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); ++ if (sender_cm != NULL) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (sender_cm->is_deopt_entry(_pc) || ++ sender_cm->is_deopt_mh_entry(_pc)) { ++ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------------------ ++// frame::update_map_with_saved_link ++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { ++ // The interpreter and compiler(s) always save fp in a known ++ // location on entry. We must record where that location is ++ // so that if fp was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves fp if we record where it is then ++ // we don't have to always save fp on entry and exit to c2 compiled ++ // code, on entry will be enough. ++ assert(map != NULL, "map must be set"); ++ map->set_location(::fp->as_VMReg(), (address) link_addr); ++ // this is weird "H" ought to be at a higher address however the ++ // oopMaps seems to have the "H" regs at the same address and the ++ // vanilla register. ++ map->set_location(::fp->as_VMReg()->next(), (address) link_addr); ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::sender_for_interpreter_frame ++frame frame::sender_for_interpreter_frame(RegisterMap* map) const { ++ // SP is the raw SP from the sender after adapter or interpreter ++ // extension. ++ intptr_t* sender_sp = this->sender_sp(); ++ ++ // This is the sp before any possible extension (adapter/locals). ++ intptr_t* unextended_sp = interpreter_frame_sender_sp(); ++ ++#ifdef COMPILER2 ++ assert(map != NULL, "map must be set"); ++ if (map->update_map()) { ++ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); ++ } ++#endif // COMPILER2 ++ ++ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::sender_for_compiled_frame ++frame frame::sender_for_compiled_frame(RegisterMap* map) const { ++ // we cannot rely upon the last fp having been saved to the thread ++ // in C2 code but it will have been pushed onto the stack. so we ++ // have to find it relative to the unextended sp ++ ++ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); ++ intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size(); ++ intptr_t* unextended_sp = l_sender_sp; ++ ++ // the return_address is always the word on the stack ++ address sender_pc = (address) *(l_sender_sp + frame::return_addr_offset); ++ ++ intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp + frame::link_offset); ++ ++ assert(map != NULL, "map must be set"); ++ if (map->update_map()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); ++ if (_cb->oop_maps() != NULL) { ++ OopMapSet::update_register_map(this, map); ++ } ++ ++ // Since the prolog does the save and restore of FP there is no ++ // oopmap for it so we must fill in its location as if there was ++ // an oopmap entry since if our caller was compiled code there ++ // could be live jvm state in it. ++ update_map_with_saved_link(map, saved_fp_addr); ++ } ++ ++ return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc); ++} ++ ++//------------------------------------------------------------------------------ ++// frame::sender ++frame frame::sender(RegisterMap* map) const { ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ assert(map != NULL, "map must be set"); ++ map->set_include_argument_oops(false); ++ ++ if (is_entry_frame()) { ++ return sender_for_entry_frame(map); ++ } ++ if (is_interpreted_frame()) { ++ return sender_for_interpreter_frame(map); ++ } ++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); ++ ++ // This test looks odd: why is it not is_compiled_frame() ? That's ++ // because stubs also have OOP maps. ++ if (_cb != NULL) { ++ return sender_for_compiled_frame(map); ++ } ++ ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return frame(sender_sp(), link(), sender_pc()); ++} ++ ++bool frame::is_interpreted_frame_valid(JavaThread* thread) const { ++ assert(is_interpreted_frame(), "Not an interpreted frame"); ++ // These are reasonable sanity checks ++ if (fp() == NULL || (intptr_t(fp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (sp() == NULL || (intptr_t(sp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (fp() + interpreter_frame_initial_sp_offset < sp()) { ++ return false; ++ } ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ // do some validation of frame elements ++ ++ // first the method ++ ++ Method* m = *interpreter_frame_method_addr(); ++ ++ // validate the method we'd find in this potential sender ++ if (!Method::is_valid_method(m)) { ++ return false; ++ } ++ // stack frames shouldn't be much larger than max_stack elements ++ // this test requires the use of unextended_sp which is the sp as seen by ++ // the current frame, and not sp which is the "raw" pc which could point ++ // further because of local variables of the callee method inserted after ++ // method arguments ++ if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { ++ return false; ++ } ++ ++ // validate bci/bcx ++ address bcp = interpreter_frame_bcp(); ++ if (m->validate_bci_from_bcp(bcp) < 0) { ++ return false; ++ } ++ ++ // validate constantPoolCache* ++ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); ++ if (MetaspaceObj::is_valid(cp) == false) { ++ return false; ++ } ++ // validate locals ++ address locals = (address) *interpreter_frame_locals_addr(); ++ ++ if (locals > thread->stack_base() || locals < (address) fp()) { ++ return false; ++ } ++ // We'd have to be pretty unlucky to be mislead at this point ++ return true; ++} ++ ++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ Method* method = interpreter_frame_method(); ++ BasicType type = method->result_type(); ++ ++ intptr_t* tos_addr = NULL; ++ if (method->is_native()) { ++ tos_addr = (intptr_t*)sp(); ++ if (type == T_FLOAT || type == T_DOUBLE) { ++ // This is because we do a push(ltos) after push(dtos) in generate_native_entry. ++ tos_addr += 2 * Interpreter::stackElementWords; ++ } ++ } else { ++ tos_addr = (intptr_t*)interpreter_frame_tos_address(); ++ } ++ ++ switch (type) { ++ case T_OBJECT : ++ case T_ARRAY : { ++ oop obj; ++ if (method->is_native()) { ++ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++ } else { ++ oop* obj_p = (oop*)tos_addr; ++ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ } ++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); ++ *oop_result = obj; ++ break; ++ } ++ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; ++ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; ++ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; ++ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; ++ case T_INT : value_result->i = *(jint*)tos_addr; break; ++ case T_LONG : value_result->j = *(jlong*)tos_addr; break; ++ case T_FLOAT : { ++ value_result->f = *(jfloat*)tos_addr; ++ break; ++ } ++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; ++ case T_VOID : /* Nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ return type; ++} ++ ++ ++intptr_t* frame::interpreter_frame_tos_at(jint offset) const { ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ return &interpreter_frame_tos_address()[index]; ++} ++ ++#ifndef PRODUCT ++ ++#define DESCRIBE_FP_OFFSET(name) \ ++ values.describe(frame_no, fp() + frame::name##_offset, #name) ++ ++void frame::describe_pd(FrameValues& values, int frame_no) { ++ if (is_interpreted_frame()) { ++ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_method); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mdp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mirror); ++ DESCRIBE_FP_OFFSET(interpreter_frame_cache); ++ DESCRIBE_FP_OFFSET(interpreter_frame_locals); ++ DESCRIBE_FP_OFFSET(interpreter_frame_bcp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); ++ } ++} ++#endif ++ ++intptr_t *frame::initial_deoptimization_info() { ++ // Not used on riscv, but we must return something. ++ return NULL; ++} ++ ++intptr_t* frame::real_fp() const { ++ if (_cb != NULL) { ++ // use the frame size if valid ++ int size = _cb->frame_size(); ++ if (size > 0) { ++ return unextended_sp() + size; ++ } ++ } ++ // else rely on fp() ++ assert(!is_compiled_frame(), "unknown compiled frame size"); ++ return fp(); ++} ++ ++#undef DESCRIBE_FP_OFFSET ++ ++#ifndef PRODUCT ++// This is a generic constructor which is only used by pns() in debug.cpp. ++frame::frame(void* ptr_sp, void* ptr_fp, void* pc) { ++ init((intptr_t*)ptr_sp, (intptr_t*)ptr_fp, (address)pc); ++} ++ ++void frame::pd_ps() {} ++#endif ++ ++void JavaFrameAnchor::make_walkable(JavaThread* thread) { ++ // last frame set? ++ if (last_Java_sp() == NULL) { return; } ++ // already walkable? ++ if (walkable()) { return; } ++ vmassert(Thread::current() == (Thread*)thread, "not current thread"); ++ vmassert(last_Java_sp() != NULL, "not called from Java code?"); ++ vmassert(last_Java_pc() == NULL, "already walkable"); ++ capture_last_Java_pc(); ++ vmassert(walkable(), "something went wrong"); ++} ++ ++void JavaFrameAnchor::capture_last_Java_pc() { ++ vmassert(_last_Java_sp != NULL, "no last frame set"); ++ vmassert(_last_Java_pc == NULL, "already walkable"); ++ _last_Java_pc = (address)_last_Java_sp[-1]; ++} +diff --git a/src/hotspot/cpu/riscv/frame_riscv.hpp b/src/hotspot/cpu/riscv/frame_riscv.hpp +new file mode 100644 +index 000000000..7acabcbba +--- /dev/null ++++ b/src/hotspot/cpu/riscv/frame_riscv.hpp +@@ -0,0 +1,200 @@ ++/* ++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_FRAME_RISCV_HPP ++#define CPU_RISCV_FRAME_RISCV_HPP ++ ++#include "runtime/synchronizer.hpp" ++ ++// A frame represents a physical stack frame (an activation). Frames can be ++// C or Java frames, and the Java frames can be interpreted or compiled. ++// In contrast, vframes represent source-level activations, so that one physical frame ++// can correspond to multiple source level frames because of inlining. ++// A frame is comprised of {pc, fp, sp} ++// ------------------------------ Asm interpreter ---------------------------------------- ++// Layout of asm interpreter frame: ++// [expression stack ] * <- sp ++ ++// [monitors[0] ] \ ++// ... | monitor block size = k ++// [monitors[k-1] ] / ++// [frame initial esp ] ( == &monitors[0], initially here) initial_sp_offset ++// [byte code index/pointr] = bcx() bcx_offset ++ ++// [pointer to locals ] = locals() locals_offset ++// [constant pool cache ] = cache() cache_offset ++ ++// [klass of method ] = mirror() mirror_offset ++// [padding ] ++ ++// [methodData ] = mdp() mdx_offset ++// [methodOop ] = method() method_offset ++ ++// [last esp ] = last_sp() last_sp_offset ++// [old stack pointer ] (sender_sp) sender_sp_offset ++ ++// [old frame pointer ] ++// [return pc ] ++ ++// [last sp ] <- fp = link() ++// [oop temp ] (only for native calls) ++ ++// [padding ] (to preserve machine SP alignment) ++// [locals and parameters ] ++// <- sender sp ++// ------------------------------ Asm interpreter ---------------------------------------- ++ ++// ------------------------------ C Frame ------------------------------------------------ ++// Stack: gcc with -fno-omit-frame-pointer ++// . ++// . ++// +-> . ++// | +-----------------+ | ++// | | return address | | ++// | | previous fp ------+ ++// | | saved registers | ++// | | local variables | ++// | | ... | <-+ ++// | +-----------------+ | ++// | | return address | | ++// +------ previous fp | | ++// | saved registers | | ++// | local variables | | ++// +-> | ... | | ++// | +-----------------+ | ++// | | return address | | ++// | | previous fp ------+ ++// | | saved registers | ++// | | local variables | ++// | | ... | <-+ ++// | +-----------------+ | ++// | | return address | | ++// +------ previous fp | | ++// | saved registers | | ++// | local variables | | ++// $fp --> | ... | | ++// +-----------------+ | ++// | return address | | ++// | previous fp ------+ ++// | saved registers | ++// $sp --> | local variables | ++// +-----------------+ ++// ------------------------------ C Frame ------------------------------------------------ ++ ++ public: ++ enum { ++ pc_return_offset = 0, ++ // All frames ++ link_offset = -2, ++ return_addr_offset = -1, ++ sender_sp_offset = 0, ++ // Interpreter frames ++ interpreter_frame_oop_temp_offset = 1, // for native calls only ++ ++ interpreter_frame_sender_sp_offset = -3, ++ // outgoing sp before a call to an invoked method ++ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, ++ interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, ++ interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1, ++ interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1, ++ interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1, ++ interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1, ++ interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1, ++ interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1, ++ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, ++ ++ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, ++ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, ++ ++ // Entry frames ++ // n.b. these values are determined by the layout defined in ++ // stubGenerator for the Java call stub ++ entry_frame_after_call_words = 34, ++ entry_frame_call_wrapper_offset = -10, ++ ++ // we don't need a save area ++ arg_reg_save_area_bytes = 0 ++ }; ++ ++ intptr_t ptr_at(int offset) const { ++ return *ptr_at_addr(offset); ++ } ++ ++ void ptr_at_put(int offset, intptr_t value) { ++ *ptr_at_addr(offset) = value; ++ } ++ ++ private: ++ // an additional field beyond _sp and _pc: ++ intptr_t* _fp; // frame pointer ++ // The interpreter and adapters will extend the frame of the caller. ++ // Since oopMaps are based on the sp of the caller before extension ++ // we need to know that value. However in order to compute the address ++ // of the return address we need the real "raw" sp. Since sparc already ++ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's ++ // original sp we use that convention. ++ ++ intptr_t* _unextended_sp; ++ void adjust_unextended_sp(); ++ ++ intptr_t* ptr_at_addr(int offset) const { ++ return (intptr_t*) addr_at(offset); ++ } ++ ++#ifdef ASSERT ++ // Used in frame::sender_for_{interpreter,compiled}_frame ++ static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp); ++#endif ++ ++ public: ++ // Constructors ++ ++ frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); ++ ++ frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc); ++ ++ frame(intptr_t* ptr_sp, intptr_t* ptr_fp); ++ ++ void init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc); ++ ++ // accessors for the instance variables ++ // Note: not necessarily the real 'frame pointer' (see real_fp) ++ intptr_t* fp() const { return _fp; } ++ ++ inline address* sender_pc_addr() const; ++ ++ // expression stack tos if we are nested in a java call ++ intptr_t* interpreter_frame_last_sp() const; ++ ++ // helper to update a map with callee-saved RBP ++ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); ++ ++ // deoptimization support ++ void interpreter_frame_set_last_sp(intptr_t* ptr_sp); ++ ++ static jint interpreter_frame_expression_stack_direction() { return -1; } ++ ++#endif // CPU_RISCV_FRAME_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/frame_riscv.inline.hpp b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp +new file mode 100644 +index 000000000..5bc6b430c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/frame_riscv.inline.hpp +@@ -0,0 +1,257 @@ ++/* ++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_FRAME_RISCV_INLINE_HPP ++#define CPU_RISCV_FRAME_RISCV_INLINE_HPP ++ ++#include "code/codeCache.hpp" ++#include "code/vmreg.inline.hpp" ++ ++// Inline functions for RISCV frames: ++ ++// Constructors: ++ ++inline frame::frame() { ++ _pc = NULL; ++ _sp = NULL; ++ _unextended_sp = NULL; ++ _fp = NULL; ++ _cb = NULL; ++ _deopt_state = unknown; ++} ++ ++static int spin; ++ ++inline void frame::init(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { ++ intptr_t a = intptr_t(ptr_sp); ++ intptr_t b = intptr_t(ptr_fp); ++ _sp = ptr_sp; ++ _unextended_sp = ptr_sp; ++ _fp = ptr_fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp, address pc) { ++ init(ptr_sp, ptr_fp, pc); ++} ++ ++inline frame::frame(intptr_t* ptr_sp, intptr_t* unextended_sp, intptr_t* ptr_fp, address pc) { ++ intptr_t a = intptr_t(ptr_sp); ++ intptr_t b = intptr_t(ptr_fp); ++ _sp = ptr_sp; ++ _unextended_sp = unextended_sp; ++ _fp = ptr_fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* ptr_sp, intptr_t* ptr_fp) { ++ intptr_t a = intptr_t(ptr_sp); ++ intptr_t b = intptr_t(ptr_fp); ++ _sp = ptr_sp; ++ _unextended_sp = ptr_sp; ++ _fp = ptr_fp; ++ _pc = (address)(ptr_sp[-1]); ++ ++ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace ++ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly ++ // unlucky the junk value could be to a zombied method and we'll die on the ++ // find_blob call. This is also why we can have no asserts on the validity ++ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler ++ // -> pd_last_frame should use a specialized version of pd_last_frame which could ++ // call a specilaized frame constructor instead of this one. ++ // Then we could use the assert below. However this assert is of somewhat dubious ++ // value. ++ ++ _cb = CodeCache::find_blob(_pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++// Accessors ++ ++inline bool frame::equal(frame other) const { ++ bool ret = sp() == other.sp() && ++ unextended_sp() == other.unextended_sp() && ++ fp() == other.fp() && ++ pc() == other.pc(); ++ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); ++ return ret; ++} ++ ++// Return unique id for this frame. The id must have a value where we can distinguish ++// identity and younger/older relationship. NULL represents an invalid (incomparable) ++// frame. ++inline intptr_t* frame::id(void) const { return unextended_sp(); } ++ ++// Relationals on frames based ++ ++// Return true if the frame is younger (more recent activation) than the frame represented by id ++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() < id ; } ++// Return true if the frame is older (less recent activation) than the frame represented by id ++inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() > id ; } ++ ++inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } ++ ++inline intptr_t* frame::link_or_null() const { ++ intptr_t** ptr = (intptr_t **)addr_at(link_offset); ++ return os::is_readable_pointer(ptr) ? *ptr : NULL; ++} ++ ++inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } ++ ++// Return address ++inline address* frame::sender_pc_addr() const { return (address*) addr_at(return_addr_offset); } ++inline address frame::sender_pc() const { return *sender_pc_addr(); } ++inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); } ++ ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_last_sp() const { ++ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_bcp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_bcp_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_mdp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_mdp_offset); ++} ++ ++ ++// Constant pool cache ++ ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); ++} ++ ++// Method ++ ++inline Method** frame::interpreter_frame_method_addr() const { ++ return (Method**)addr_at(interpreter_frame_method_offset); ++} ++ ++// Mirror ++ ++inline oop* frame::interpreter_frame_mirror_addr() const { ++ return (oop*)addr_at(interpreter_frame_mirror_offset); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ intptr_t* last_sp = interpreter_frame_last_sp(); ++ if (last_sp == NULL) { ++ return sp(); ++ } else { ++ // sp() may have been extended or shrunk by an adapter. At least ++ // check that we don't fall behind the legal region. ++ // For top deoptimized frame last_sp == interpreter_frame_monitor_end. ++ assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); ++ return last_sp; ++ } ++} ++ ++inline oop* frame::interpreter_frame_temp_oop_addr() const { ++ return (oop *)(fp() + interpreter_frame_oop_temp_offset); ++} ++ ++inline int frame::interpreter_frame_monitor_size() { ++ return BasicObjectLock::size(); ++} ++ ++ ++// expression stack ++// (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++inline intptr_t* frame::interpreter_frame_expression_stack() const { ++ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); ++ return monitor_end-1; ++} ++ ++ ++// Entry frames ++ ++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { ++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++} ++ ++ ++// Compiled frames ++inline oop frame::saved_oop_result(RegisterMap* map) const { ++ oop* result_adr = (oop *)map->location(x10->as_VMReg()); ++ if(result_adr != NULL) { ++ return (*result_adr); ++ } else { ++ ShouldNotReachHere(); ++ return NULL; ++ } ++} ++ ++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { ++ oop* result_adr = (oop *)map->location(x10->as_VMReg()); ++ if(result_adr != NULL) { ++ *result_adr = obj; ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++#endif // CPU_RISCV_FRAME_RISCV_INLINE_HPP +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +new file mode 100644 +index 000000000..6f778956d +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +@@ -0,0 +1,479 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/g1/g1BarrierSet.hpp" ++#include "gc/g1/g1BarrierSetAssembler.hpp" ++#include "gc/g1/g1BarrierSetRuntime.hpp" ++#include "gc/g1/g1CardTable.hpp" ++#include "gc/g1/g1ThreadLocalData.hpp" ++#include "gc/g1/heapRegion.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/g1/c1/g1BarrierSetC1.hpp" ++#endif ++ ++#define __ masm-> ++ ++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs) { ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ if (!dest_uninitialized) { ++ Label done; ++ Address in_progress(xthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lwu(t0, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lbu(t0, in_progress); ++ } ++ __ beqz(t0, done); ++ ++ __ push_reg(saved_regs, sp); ++ if (count == c_rarg0) { ++ if (addr == c_rarg1) { ++ // exactly backwards!! ++ __ mv(t0, c_rarg0); ++ __ mv(c_rarg0, c_rarg1); ++ __ mv(c_rarg1, t0); ++ } else { ++ __ mv(c_rarg1, count); ++ __ mv(c_rarg0, addr); ++ } ++ } else { ++ __ mv(c_rarg0, addr); ++ __ mv(c_rarg1, count); ++ } ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); ++ } ++ __ pop_reg(saved_regs, sp); ++ ++ __ bind(done); ++ } ++} ++ ++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs) { ++ __ push_reg(saved_regs, sp); ++ assert_different_registers(start, count, tmp); ++ assert_different_registers(c_rarg0, count); ++ __ mv(c_rarg0, start); ++ __ mv(c_rarg1, count); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); ++ __ pop_reg(saved_regs, sp); ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ ++ assert(thread == xthread, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert_different_registers(obj, pre_val, tmp, t0); ++ assert(pre_val != noreg && tmp != noreg, "expecting a register"); ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width ++ __ lwu(tmp, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lbu(tmp, in_progress); ++ } ++ __ beqz(tmp, done); ++ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); ++ } ++ ++ // Is the previous value null? ++ __ beqz(pre_val, done); ++ ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ ++ __ ld(tmp, index); // tmp := *index_adr ++ __ beqz(tmp, runtime); // tmp == 0? ++ // If yes, goto runtime ++ ++ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize ++ __ sd(tmp, index); // *index_adr := tmp ++ __ ld(t0, buffer); ++ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr ++ ++ // Record the previous value ++ __ sd(pre_val, Address(tmp, 0)); ++ __ j(done); ++ ++ __ bind(runtime); ++ // save the live input values ++ RegSet saved = RegSet::of(pre_val); ++ if (tosca_live) { saved += RegSet::of(x10); } ++ if (obj != noreg) { saved += RegSet::of(obj); } ++ ++ __ push_reg(saved, sp); ++ ++ if (expand_call) { ++ assert(pre_val != c_rarg1, "smashed arg"); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } ++ ++ __ pop_reg(saved, sp); ++ ++ __ bind(done); ++ ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2) { ++ assert(thread == xthread, "must be"); ++ assert_different_registers(store_addr, new_val, thread, tmp, tmp2, ++ t0); ++ assert(store_addr != noreg && new_val != noreg && tmp != noreg && ++ tmp2 != noreg, "expecting a register"); ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // Does store cross heap regions? ++ ++ __ xorr(tmp, store_addr, new_val); ++ __ srli(tmp, tmp, HeapRegion::LogOfHRGrainBytes); ++ __ beqz(tmp, done); ++ ++ // crosses regions, storing NULL? ++ ++ __ beqz(new_val, done); ++ ++ // storing region crossing non-NULL, is card already dirty? ++ ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ const Register card_addr = tmp; ++ ++ __ srli(card_addr, store_addr, CardTable::card_shift); ++ ++ // get the address of the card ++ __ load_byte_map_base(tmp2); ++ __ add(card_addr, card_addr, tmp2); ++ __ lbu(tmp2, Address(card_addr)); ++ __ mv(t0, (int)G1CardTable::g1_young_card_val()); ++ __ beq(tmp2, t0, done); ++ ++ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); ++ ++ __ membar(MacroAssembler::StoreLoad); ++ ++ __ lbu(tmp2, Address(card_addr)); ++ __ beqz(tmp2, done); ++ ++ // storing a region crossing, non-NULL oop, card is clean. ++ // dirty card and log. ++ ++ __ sb(zr, Address(card_addr)); ++ ++ __ ld(t0, queue_index); ++ __ beqz(t0, runtime); ++ __ sub(t0, t0, wordSize); ++ __ sd(t0, queue_index); ++ ++ __ ld(tmp2, buffer); ++ __ add(t0, tmp2, t0); ++ __ sd(card_addr, Address(t0, 0)); ++ __ j(done); ++ ++ __ bind(runtime); ++ // save the live input values ++ RegSet saved = RegSet::of(store_addr, new_val); ++ __ push_reg(saved, sp); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); ++ __ pop_reg(saved, sp); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ bool on_oop = type == T_OBJECT || type == T_ARRAY; ++ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; ++ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; ++ bool on_reference = on_weak || on_phantom; ++ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ if (on_oop && on_reference) { ++ // RA is live. It must be saved around calls. ++ __ enter(); // barrier may call runtime ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ g1_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ __ leave(); ++ } ++} ++ ++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ // flatten object address if needed ++ if (dst.offset() == 0) { ++ __ mv(tmp3, dst.base()); ++ } else { ++ __ la(tmp3, dst); ++ } ++ ++ g1_write_barrier_pre(masm, ++ tmp3 /* obj */, ++ tmp2 /* pre_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg, noreg); ++ } else { ++ // G1 barrier needs uncompressed oop for region cross check. ++ Register new_val = val; ++ if (UseCompressedOops) { ++ new_val = t1; ++ __ mv(new_val, val); ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); ++ g1_write_barrier_post(masm, ++ tmp3 /* store_adr */, ++ new_val /* new_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ tmp2 /* tmp2 */); ++ } ++} ++ ++#ifdef COMPILER1 ++ ++#undef __ ++#define __ ce->masm()-> ++ ++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { ++ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ ++ // At this point we know that marking is in progress. ++ // If do_load() is true then we have to emit the ++ // load of the previous value; otherwise it has already ++ // been loaded into _pre_val. ++ __ bind(*stub->entry()); ++ ++ assert(stub->pre_val()->is_register(), "Precondition."); ++ ++ Register pre_val_reg = stub->pre_val()->as_register(); ++ ++ if (stub->do_load()) { ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), ++ false /* wide */, false /* unaligned */); ++ } ++ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); ++ ce->store_parameter(stub->pre_val()->as_register(), 0); ++ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); ++ __ j(*stub->continuation()); ++} ++ ++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { ++ G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ __ bind(*stub->entry()); ++ assert(stub->addr()->is_register(), "Precondition"); ++ assert(stub->new_val()->is_register(), "Precondition"); ++ Register new_val_reg = stub->new_val()->as_register(); ++ __ beqz(new_val_reg, *stub->continuation(), /* is_far */ true); ++ ce->store_parameter(stub->addr()->as_pointer_register(), 0); ++ __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin())); ++ __ j(*stub->continuation()); ++} ++ ++#undef __ ++ ++#define __ sasm-> ++ ++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("g1_pre_barrier", false); ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ // arg0 : previous value of memory ++ const Register pre_val = x10; ++ const Register thread = xthread; ++ const Register tmp = t0; ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ Label done; ++ Label runtime; ++ ++ // Is marking still active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width ++ __ lwu(tmp, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lbu(tmp, in_progress); ++ } ++ __ beqz(tmp, done); ++ ++ // Can we store original value in the thread's buffer? ++ __ ld(tmp, queue_index); ++ __ beqz(tmp, runtime); ++ ++ __ sub(tmp, tmp, wordSize); ++ __ sd(tmp, queue_index); ++ __ ld(t1, buffer); ++ __ add(tmp, tmp, t1); ++ __ load_parameter(0, t1); ++ __ sd(t1, Address(tmp, 0)); ++ __ j(done); ++ ++ __ bind(runtime); ++ __ push_call_clobbered_registers(); ++ __ load_parameter(0, pre_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ __ pop_call_clobbered_registers(); ++ __ bind(done); ++ ++ __ epilogue(); ++} ++ ++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("g1_post_barrier", false); ++ ++ // arg0 : store_address ++ Address store_addr(fp, 2 * BytesPerWord); // 2 BytesPerWord from fp ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // At this point we know new_value is non-NULL and the new_value crosses regions. ++ // Must check to see if card is already dirty ++ const Register thread = xthread; ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ const Register card_offset = t1; ++ // RA is free here, so we can use it to hold the byte_map_base. ++ const Register byte_map_base = ra; ++ ++ assert_different_registers(card_offset, byte_map_base, t0); ++ ++ __ load_parameter(0, card_offset); ++ __ srli(card_offset, card_offset, CardTable::card_shift); ++ __ load_byte_map_base(byte_map_base); ++ ++ // Convert card offset into an address in card_addr ++ Register card_addr = card_offset; ++ __ add(card_addr, byte_map_base, card_addr); ++ ++ __ lbu(t0, Address(card_addr, 0)); ++ __ sub(t0, t0, (int)G1CardTable::g1_young_card_val()); ++ __ beqz(t0, done); ++ ++ assert((int)CardTable::dirty_card_val() == 0, "must be 0"); ++ ++ __ membar(MacroAssembler::StoreLoad); ++ __ lbu(t0, Address(card_addr, 0)); ++ __ beqz(t0, done); ++ ++ // storing region crossing non-NULL, card is clean. ++ // dirty card and log. ++ __ sb(zr, Address(card_addr, 0)); ++ ++ __ ld(t0, queue_index); ++ __ beqz(t0, runtime); ++ __ sub(t0, t0, wordSize); ++ __ sd(t0, queue_index); ++ ++ // Reuse RA to hold buffer_addr ++ const Register buffer_addr = ra; ++ ++ __ ld(buffer_addr, buffer); ++ __ add(t0, buffer_addr, t0); ++ __ sd(card_addr, Address(t0, 0)); ++ __ j(done); ++ ++ __ bind(runtime); ++ __ push_call_clobbered_registers(); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); ++ __ pop_call_clobbered_registers(); ++ __ bind(done); ++ __ epilogue(); ++} ++ ++#undef __ ++ ++#endif // COMPILER1 +diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp +new file mode 100644 +index 000000000..7f85e002d +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp +@@ -0,0 +1,78 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++#include "utilities/macros.hpp" ++ ++#ifdef COMPILER1 ++class LIR_Assembler; ++#endif ++class StubAssembler; ++class G1PreBarrierStub; ++class G1PostBarrierStub; ++ ++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs); ++ void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs); ++ ++ void g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); ++ ++public: ++#ifdef COMPILER1 ++ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); ++ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); ++ ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); ++#endif ++ ++ void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++}; ++ ++#endif // CPU_RISCV_GC_G1_G1BARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +new file mode 100644 +index 000000000..203b82744 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.cpp +@@ -0,0 +1,226 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/thread.hpp" ++ ++#define __ masm-> ++ ++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ // RA is live. It must be saved around calls. ++ ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ switch (type) { ++ case T_OBJECT: // fall through ++ case T_ARRAY: { ++ if (in_heap) { ++ if (UseCompressedOops) { ++ __ lwu(dst, src); ++ if (is_not_null) { ++ __ decode_heap_oop_not_null(dst); ++ } else { ++ __ decode_heap_oop(dst); ++ } ++ } else { ++ __ ld(dst, src); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ ld(dst, src); ++ } ++ break; ++ } ++ case T_BOOLEAN: __ load_unsigned_byte (dst, src); break; ++ case T_BYTE: __ load_signed_byte (dst, src); break; ++ case T_CHAR: __ load_unsigned_short(dst, src); break; ++ case T_SHORT: __ load_signed_short (dst, src); break; ++ case T_INT: __ lw (dst, src); break; ++ case T_LONG: __ ld (dst, src); break; ++ case T_ADDRESS: __ ld (dst, src); break; ++ case T_FLOAT: __ flw (f10, src); break; ++ case T_DOUBLE: __ fld (f10, src); break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ switch (type) { ++ case T_OBJECT: // fall through ++ case T_ARRAY: { ++ val = val == noreg ? zr : val; ++ if (in_heap) { ++ if (UseCompressedOops) { ++ assert(!dst.uses(val), "not enough registers"); ++ if (val != zr) { ++ __ encode_heap_oop(val); ++ } ++ __ sw(val, dst); ++ } else { ++ __ sd(val, dst); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ sd(val, dst); ++ } ++ break; ++ } ++ case T_BOOLEAN: ++ __ andi(val, val, 0x1); // boolean is true if LSB is 1 ++ __ sb(val, dst); ++ break; ++ case T_BYTE: __ sb(val, dst); break; ++ case T_CHAR: __ sh(val, dst); break; ++ case T_SHORT: __ sh(val, dst); break; ++ case T_INT: __ sw(val, dst); break; ++ case T_LONG: __ sd(val, dst); break; ++ case T_ADDRESS: __ sd(val, dst); break; ++ case T_FLOAT: __ fsw(f10, dst); break; ++ case T_DOUBLE: __ fsd(f10, dst); break; ++ default: Unimplemented(); ++ } ++ ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far) { ++ __ beq(obj1, obj2, equal, is_far); ++} ++ ++void BarrierSetAssembler::obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far) { ++ __ bne(obj1, obj2, nequal, is_far); ++} ++ ++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ // If mask changes we need to ensure that the inverse is still encodable as an immediate ++ STATIC_ASSERT(JNIHandles::weak_tag_mask == 1); ++ __ andi(obj, obj, ~JNIHandles::weak_tag_mask); ++ __ ld(obj, Address(obj, 0)); // *obj ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. ++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1, ++ Register tmp2, ++ Label& slow_case, ++ bool is_far) { ++ assert_different_registers(obj, tmp2); ++ assert_different_registers(obj, var_size_in_bytes); ++ Register end = tmp2; ++ ++ __ ld(obj, Address(xthread, JavaThread::tlab_top_offset())); ++ if (var_size_in_bytes == noreg) { ++ __ la(end, Address(obj, con_size_in_bytes)); ++ } else { ++ __ add(end, obj, var_size_in_bytes); ++ } ++ __ ld(t0, Address(xthread, JavaThread::tlab_end_offset())); ++ __ bgtu(end, t0, slow_case, is_far); ++ ++ // update the tlab top pointer ++ __ sd(end, Address(xthread, JavaThread::tlab_top_offset())); ++ ++ // recover var_size_in_bytes if necessary ++ if (var_size_in_bytes == end) { ++ __ sub(var_size_in_bytes, var_size_in_bytes, obj); ++ } ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1, ++ Label& slow_case, ++ bool is_far) { ++ assert_different_registers(obj, var_size_in_bytes, tmp1); ++ if (!Universe::heap()->supports_inline_contig_alloc()) { ++ __ j(slow_case); ++ } else { ++ Register end = tmp1; ++ Label retry; ++ __ bind(retry); ++ ++ // Get the current end of the heap ++ ExternalAddress address_end((address) Universe::heap()->end_addr()); ++ { ++ int32_t offset; ++ __ la_patchable(t1, address_end, offset); ++ __ ld(t1, Address(t1, offset)); ++ } ++ ++ // Get the current top of the heap ++ ExternalAddress address_top((address) Universe::heap()->top_addr()); ++ { ++ int32_t offset; ++ __ la_patchable(t0, address_top, offset); ++ __ addi(t0, t0, offset); ++ __ lr_d(obj, t0, Assembler::aqrl); ++ } ++ ++ // Adjust it my the size of our new object ++ if (var_size_in_bytes == noreg) { ++ __ la(end, Address(obj, con_size_in_bytes)); ++ } else { ++ __ add(end, obj, var_size_in_bytes); ++ } ++ ++ // if end < obj then we wrapped around high memory ++ __ bltu(end, obj, slow_case, is_far); ++ ++ __ bgtu(end, t1, slow_case, is_far); ++ ++ // If heap_top hasn't been changed by some other thread, update it. ++ __ sc_d(t1, end, t0, Assembler::rl); ++ __ bnez(t1, retry); ++ ++ incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, tmp1); ++ } ++} ++ ++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1) { ++ assert(tmp1->is_valid(), "need temp reg"); ++ ++ __ ld(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); ++ if (var_size_in_bytes->is_valid()) { ++ __ add(tmp1, tmp1, var_size_in_bytes); ++ } else { ++ __ add(tmp1, tmp1, con_size_in_bytes); ++ } ++ __ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset()))); ++} +diff --git a/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +new file mode 100644 +index 000000000..964fc28be +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/barrierSetAssembler_riscv.hpp +@@ -0,0 +1,75 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "memory/allocation.hpp" ++#include "oops/access.hpp" ++ ++class BarrierSetAssembler: public CHeapObj { ++private: ++ void incr_allocated_bytes(MacroAssembler* masm, ++ Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1 = noreg); ++ ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs) {} ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register start, Register end, Register tmp, RegSet saved_regs) {} ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); ++ virtual void obj_equals(MacroAssembler* masm, Register obj1, Register obj2, Label& equal, bool is_far = false); ++ virtual void obj_nequals(MacroAssembler* masm, Register obj1, Register obj2, Label& nequal, bool is_far = false); ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); ++ ++ virtual void tlab_allocate(MacroAssembler* masm, ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp1, // temp register ++ Register tmp2, // temp register ++ Label& slow_case, // continuation point if fast allocation fails ++ bool is_far = false // the distance of label slowcase could be more than 12KiB in C1 ++ ); ++ ++ void eden_allocate(MacroAssembler* masm, ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp1, // temp register ++ Label& slow_case, // continuation point if fast allocation fails ++ bool is_far = false // the distance of label slowcase could be more than 12KiB in C1 ++ ); ++ virtual void barrier_stubs_init() {} ++ virtual ~BarrierSetAssembler() {} ++}; ++ ++#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +new file mode 100644 +index 000000000..1720488fb +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.cpp +@@ -0,0 +1,120 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/cardTableBarrierSetAssembler.hpp" ++#include "interpreter/interp_masm.hpp" ++ ++#define __ masm-> ++ ++ ++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Register tmp) { ++ assert_different_registers(obj, tmp); ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ assert(bs->kind() == BarrierSet::CardTableBarrierSet, "Wrong barrier set kind"); ++ ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ __ srli(obj, obj, CardTable::card_shift); ++ ++ assert(CardTable::dirty_card_val() == 0, "must be"); ++ ++ __ load_byte_map_base(tmp); ++ __ add(tmp, obj, tmp); ++ ++ if (UseCondCardMark) { ++ Label L_already_dirty; ++ __ membar(MacroAssembler::StoreLoad); ++ __ lbu(t1, Address(tmp)); ++ __ beqz(t1, L_already_dirty); ++ __ sb(zr, Address(tmp)); ++ __ bind(L_already_dirty); ++ } else { ++ if (ct->scanned_concurrently()) { ++ __ membar(MacroAssembler::StoreStore); ++ } ++ __ sb(zr, Address(tmp)); ++ } ++} ++ ++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs) { ++ assert_different_registers(start, tmp); ++ assert_different_registers(count, tmp); ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ ++ Label L_loop, L_done; ++ const Register end = count; ++ ++ __ beqz(count, L_done); // zero count - nothing to do ++ // end = start + count << LogBytesPerHeapOop ++ __ shadd(end, count, start, count, LogBytesPerHeapOop); ++ __ sub(end, end, BytesPerHeapOop); // last element address to make inclusive ++ ++ __ srli(start, start, CardTable::card_shift); ++ __ srli(end, end, CardTable::card_shift); ++ __ sub(count, end, start); // number of bytes to copy ++ ++ __ load_byte_map_base(tmp); ++ __ add(start, start, tmp); ++ if (ct->scanned_concurrently()) { ++ __ membar(MacroAssembler::StoreStore); ++ } ++ ++ __ bind(L_loop); ++ __ add(tmp, start, count); ++ __ sb(zr, Address(tmp)); ++ __ sub(count, count, 1); ++ __ bgez(count, L_loop); ++ __ bind(L_done); ++} ++ ++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool is_array = (decorators & IS_ARRAY) != 0; ++ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; ++ bool precise = is_array || on_anonymous; ++ ++ bool needs_post_barrier = val != noreg && in_heap; ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg, noreg); ++ if (needs_post_barrier) { ++ // flatten object address if needed ++ if (!precise || dst.offset() == 0) { ++ store_check(masm, dst.base(), tmp3); ++ } else { ++ __ la(tmp3, dst); ++ store_check(masm, tmp3, t0); ++ } ++ } ++} +diff --git a/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp +new file mode 100644 +index 000000000..a5b3f9fe8 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/cardTableBarrierSetAssembler_riscv.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void store_check(MacroAssembler* masm, Register obj, Register tmp); ++ ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs); ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); ++ ++}; ++ ++#endif // #ifndef CPU_RISCV_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp +new file mode 100644 +index 000000000..b82275297 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.cpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs) { ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(masm, decorators, dst, count, saved_regs); ++ } ++} ++ ++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register start, Register count, Register tmp, ++ RegSet saved_regs) { ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(masm, decorators, start, count, tmp, saved_regs); ++ } ++} ++ ++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ if (type == T_OBJECT || type == T_ARRAY) { ++ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); ++ } else { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); ++ } ++} +diff --git a/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp +new file mode 100644 +index 000000000..df206cc87 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shared/modRefBarrierSetAssembler_riscv.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++ ++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other ++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected ++// accesses, which are overridden in the concrete BarrierSetAssembler. ++ ++class ModRefBarrierSetAssembler: public BarrierSetAssembler { ++protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, RegSet saved_regs) {} ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register start, Register count, Register tmp, RegSet saved_regs) {} ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) = 0; ++ ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs); ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register start, Register count, Register tmp, RegSet saved_regs); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); ++}; ++ ++#endif // CPU_RISCV_GC_SHARED_MODREFBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp +new file mode 100644 +index 000000000..6657f1be0 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/c1/shenandoahBarrierSetC1_riscv.cpp +@@ -0,0 +1,124 @@ ++/* ++ * Copyright (c) 2018, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" ++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" ++ ++#define __ masm->masm()-> ++ ++void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) { ++ Register addr = _addr->as_register_lo(); ++ Register newval = _new_value->as_register(); ++ Register cmpval = _cmp_value->as_register(); ++ Register tmp1 = _tmp1->as_register(); ++ Register tmp2 = _tmp2->as_register(); ++ Register result = result_opr()->as_register(); ++ ++ ShenandoahBarrierSet::assembler()->iu_barrier(masm->masm(), newval, t1); ++ ++ if (UseCompressedOops) { ++ __ encode_heap_oop(tmp1, cmpval); ++ cmpval = tmp1; ++ __ encode_heap_oop(tmp2, newval); ++ newval = tmp2; ++ } ++ ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /* acquire */ Assembler::aq, ++ /* release */ Assembler::rl, /* is_cae */ false, result); ++ if (UseBarriersForVolatile) { ++ // The membar here is necessary to prevent reordering between the ++ // release store in the CAS above and a subsequent volatile load. ++ // However for !UseBarriersForVolatile, C1 inserts a full barrier before ++ // volatile loads which means we don't need an additional barrier ++ // here (see LIRGenerator::volatile_field_load()). ++ __ membar(MacroAssembler::AnyAny); ++ } ++} ++ ++#undef __ ++ ++#ifdef ASSERT ++#define __ gen->lir(__FILE__, __LINE__)-> ++#else ++#define __ gen->lir()-> ++#endif ++ ++LIR_Opr ShenandoahBarrierSetC1::atomic_cmpxchg_at_resolved(LIRAccess& access, LIRItem& cmp_value, LIRItem& new_value) { ++ BasicType bt = access.type(); ++ if (access.is_oop()) { ++ LIRGenerator *gen = access.gen(); ++ if (ShenandoahSATBBarrier) { ++ pre_barrier(gen, access.access_emit_info(), access.decorators(), access.resolved_addr(), ++ LIR_OprFact::illegalOpr /* pre_val */); ++ } ++ if (ShenandoahCASBarrier) { ++ cmp_value.load_item(); ++ new_value.load_item(); ++ ++ LIR_Opr tmp1 = gen->new_register(T_OBJECT); ++ LIR_Opr tmp2 = gen->new_register(T_OBJECT); ++ LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base(); ++ LIR_Opr result = gen->new_register(T_INT); ++ ++ __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), tmp1, tmp2, result)); ++ return result; ++ } ++ } ++ return BarrierSetC1::atomic_cmpxchg_at_resolved(access, cmp_value, new_value); ++} ++ ++LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRItem& value) { ++ LIRGenerator* gen = access.gen(); ++ BasicType type = access.type(); ++ ++ LIR_Opr result = gen->new_register(type); ++ value.load_item(); ++ LIR_Opr value_opr = value.result(); ++ ++ if (access.is_oop()) { ++ value_opr = iu_barrier(access.gen(), value_opr, access.access_emit_info(), access.decorators()); ++ } ++ ++ assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type"); ++ LIR_Opr tmp = gen->new_register(T_INT); ++ __ xchg(access.resolved_addr(), value_opr, result, tmp); ++ ++ if (access.is_oop()) { ++ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0)); ++ LIR_Opr tmp_opr = gen->new_register(type); ++ __ move(result, tmp_opr); ++ result = tmp_opr; ++ if (ShenandoahSATBBarrier) { ++ pre_barrier(access.gen(), access.access_emit_info(), access.decorators(), LIR_OprFact::illegalOpr, ++ result /* pre_val */); ++ } ++ } ++ ++ return result; ++} +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +new file mode 100644 +index 000000000..1bc01e454 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp +@@ -0,0 +1,743 @@ ++/* ++ * Copyright (c) 2018, 2020, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" ++#include "gc/shenandoah/shenandoahForwarding.hpp" ++#include "gc/shenandoah/shenandoahHeap.hpp" ++#include "gc/shenandoah/shenandoahHeapRegion.hpp" ++#include "gc/shenandoah/shenandoahRuntime.hpp" ++#include "gc/shenandoah/shenandoahThreadLocalData.hpp" ++#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#include "c1/c1_MacroAssembler.hpp" ++#include "gc/shenandoah/c1/shenandoahBarrierSetC1.hpp" ++#endif ++ ++#define __ masm-> ++ ++address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; ++ ++void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs) { ++ if (is_oop) { ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ if ((ShenandoahSATBBarrier && !dest_uninitialized) || ++ ShenandoahIUBarrier || ShenandoahLoadRefBarrier) { ++ Label done; ++ ++ // Avoid calling runtime if count == 0 ++ __ beqz(count, done); ++ ++ // Is GC active? ++ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); ++ assert_different_registers(src, dst, count, t0); ++ ++ __ lbu(t0, gc_state); ++ if (ShenandoahSATBBarrier && dest_uninitialized) { ++ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(t0, done); ++ } else { ++ __ andi(t0, t0, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::MARKING); ++ __ beqz(t0, done); ++ } ++ ++ __ push_reg(saved_regs, sp); ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop_entry), ++ src, dst, count); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop_entry), src, dst, count); ++ } ++ __ pop_reg(saved_regs, sp); ++ __ bind(done); ++ } ++ } ++} ++ ++void ShenandoahBarrierSetAssembler::shenandoah_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ if (ShenandoahSATBBarrier) { ++ satb_write_barrier_pre(masm, obj, pre_val, thread, tmp, tosca_live, expand_call); ++ } ++} ++ ++void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ assert(thread == xthread, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert_different_registers(obj, pre_val, tmp, t0); ++ assert(pre_val != noreg && tmp != noreg, "expecting a register"); ++ ++ Address in_progress(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lwu(tmp, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lbu(tmp, in_progress); ++ } ++ __ beqz(tmp, done); ++ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); ++ } ++ ++ // Is the previous value null? ++ __ beqz(pre_val, done); ++ ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ __ ld(tmp, index); // tmp := *index_adr ++ __ beqz(tmp, runtime); // tmp == 0? If yes, goto runtime ++ ++ __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize ++ __ sd(tmp, index); // *index_adr := tmp ++ __ ld(t0, buffer); ++ __ add(tmp, tmp, t0); // tmp := tmp + *buffer_adr ++ ++ // Record the previous value ++ __ sd(pre_val, Address(tmp, 0)); ++ __ j(done); ++ ++ __ bind(runtime); ++ // save the live input values ++ RegSet saved = RegSet::of(pre_val); ++ if (tosca_live) saved += RegSet::of(x10); ++ if (obj != noreg) saved += RegSet::of(obj); ++ ++ __ push_reg(saved, sp); ++ ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then ebp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ if (expand_call) { ++ assert(pre_val != c_rarg1, "smashed arg"); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } ++ ++ __ pop_reg(saved, sp); ++ ++ __ bind(done); ++} ++ ++void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { ++ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); ++ ++ Label is_null; ++ __ beqz(dst, is_null); ++ resolve_forward_pointer_not_null(masm, dst, tmp); ++ __ bind(is_null); ++} ++ ++// IMPORTANT: This must preserve all registers, even t0 and t1, except those explicitely ++// passed in. ++void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { ++ assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); ++ // The below loads the mark word, checks if the lowest two bits are ++ // set, and if so, clear the lowest two bits and copy the result ++ // to dst. Otherwise it leaves dst alone. ++ // Implementing this is surprisingly awkward. I do it here by: ++ // - Inverting the mark word ++ // - Test lowest two bits == 0 ++ // - If so, set the lowest two bits ++ // - Invert the result back, and copy to dst ++ RegSet savedRegs = RegSet::of(t2); ++ bool borrow_reg = (tmp == noreg); ++ if (borrow_reg) { ++ // No free registers available. Make one useful. ++ tmp = t0; ++ if (tmp == dst) { ++ tmp = t1; ++ } ++ savedRegs += RegSet::of(tmp); ++ } ++ ++ assert_different_registers(tmp, dst, t2); ++ __ push_reg(savedRegs, sp); ++ ++ Label done; ++ __ ld(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); ++ __ xori(tmp, tmp, -1); // eon with 0 is equivalent to XOR with -1 ++ __ andi(t2, tmp, markOopDesc::lock_mask_in_place); ++ __ bnez(t2, done); ++ __ ori(tmp, tmp, markOopDesc::marked_value); ++ __ xori(dst, tmp, -1); // eon with 0 is equivalent to XOR with -1 ++ __ bind(done); ++ ++ __ pop_reg(savedRegs, sp); ++} ++ ++void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, ++ Register dst, Address load_addr) { ++ assert(ShenandoahLoadRefBarrier, "Should be enabled"); ++ assert(dst != t1 && load_addr.base() != t1, "need t1"); ++ assert_different_registers(load_addr.base(), t1, t2); ++ ++ Label done; ++ __ enter(); ++ Address gc_state(xthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); ++ __ lbu(t1, gc_state); ++ ++ // Check for heap stability ++ __ andi(t1, t1, ShenandoahHeap::HAS_FORWARDED); ++ __ beqz(t1, done); ++ ++ // use x11 for load address ++ Register result_dst = dst; ++ if (dst == x11) { ++ __ mv(t1, dst); ++ dst = t1; ++ } ++ ++ // Save x10 and x11, unless it is an output register ++ RegSet to_save = RegSet::of(x10, x11) - result_dst; ++ __ push_reg(to_save, sp); ++ __ la(x11, load_addr); ++ __ mv(x10, dst); ++ ++ __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); ++ ++ __ mv(result_dst, x10); ++ __ pop_reg(to_save, sp); ++ ++ __ bind(done); ++ __ leave(); ++} ++ ++void ShenandoahBarrierSetAssembler::iu_barrier(MacroAssembler* masm, Register dst, Register tmp) { ++ if (ShenandoahIUBarrier) { ++ __ push_call_clobbered_registers(); ++ satb_write_barrier_pre(masm, noreg, dst, xthread, tmp, true, false); ++ __ pop_call_clobbered_registers(); ++ } ++} ++ ++void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) { ++ if (ShenandoahLoadRefBarrier) { ++ Label is_null; ++ __ beqz(dst, is_null); ++ load_reference_barrier_not_null(masm, dst, load_addr); ++ __ bind(is_null); ++ } ++} ++ ++// ++// Arguments: ++// ++// Inputs: ++// src: oop location to load from, might be clobbered ++// ++// Output: ++// dst: oop loaded from src location ++// ++// Kill: ++// x30 (tmp reg) ++// ++// Alias: ++// dst: x30 (might use x30 as temporary output register to avoid clobbering src) ++// ++void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, ++ DecoratorSet decorators, ++ BasicType type, ++ Register dst, ++ Address src, ++ Register tmp1, ++ Register tmp_thread) { ++ // 1: non-reference load, no additional barrier is needed ++ if (!is_reference_type(type)) { ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ return; ++ } ++ ++ // 2: load a reference from src location and apply LRB if needed ++ if (ShenandoahBarrierSet::need_load_reference_barrier(decorators, type)) { ++ guarantee(dst != x30 && src.base() != x30, "load_at need x30"); ++ bool ist5 = (dst == src.base()); ++ if (ist5) { ++ __ push_reg(RegSet::of(x30), sp); ++ } ++ Register result_dst = dst; ++ ++ // Preserve src location for LRB ++ if (dst == src.base()) { ++ dst = x30; ++ } ++ assert_different_registers(dst, src.base()); ++ ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ ++ load_reference_barrier(masm, dst, src); ++ ++ if (dst != result_dst) { ++ __ mv(result_dst, dst); ++ dst = result_dst; ++ } ++ ++ if (ist5) { ++ __ pop_reg(RegSet::of(x30), sp); ++ } ++ } else { ++ BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ } ++ ++ // 3: apply keep-alive barrier if needed ++ if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { ++ __ enter(); ++ __ push_call_clobbered_registers(); ++ satb_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ __ pop_call_clobbered_registers(); ++ __ leave(); ++ } ++} ++ ++void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { ++ bool on_oop = is_reference_type(type); ++ if (!on_oop) { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); ++ return; ++ } ++ ++ // flatten object address if needed ++ if (dst.offset() == 0) { ++ if (dst.base() != tmp3) { ++ __ mv(tmp3, dst.base()); ++ } ++ } else { ++ __ la(tmp3, dst); ++ } ++ ++ shenandoah_write_barrier_pre(masm, ++ tmp3 /* obj */, ++ tmp2 /* pre_val */, ++ xthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), noreg, noreg, noreg); ++ } else { ++ iu_barrier(masm, val, tmp1); ++ // G1 barrier needs uncompressed oop for region cross check. ++ Register new_val = val; ++ if (UseCompressedOops) { ++ new_val = t1; ++ __ mv(new_val, val); ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg, noreg); ++ } ++} ++ ++void ShenandoahBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ Label done; ++ // Resolve jobject ++ BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath); ++ ++ // Check for null. ++ __ beqz(obj, done); ++ ++ assert(obj != t1, "need t1"); ++ Address gc_state(jni_env, ShenandoahThreadLocalData::gc_state_offset() - JavaThread::jni_environment_offset()); ++ __ lbu(t1, gc_state); ++ ++ // Check for heap in evacuation phase ++ __ andi(t0, t1, ShenandoahHeap::EVACUATION); ++ __ bnez(t0, slowpath); ++ ++ __ bind(done); ++} ++ ++// Special Shenandoah CAS implementation that handles false negatives due ++// to concurrent evacuation. The service is more complex than a ++// traditional CAS operation because the CAS operation is intended to ++// succeed if the reference at addr exactly matches expected or if the ++// reference at addr holds a pointer to a from-space object that has ++// been relocated to the location named by expected. There are two ++// races that must be addressed: ++// a) A parallel thread may mutate the contents of addr so that it points ++// to a different object. In this case, the CAS operation should fail. ++// b) A parallel thread may heal the contents of addr, replacing a ++// from-space pointer held in addr with the to-space pointer ++// representing the new location of the object. ++// Upon entry to cmpxchg_oop, it is assured that new_val equals NULL ++// or it refers to an object that is not being evacuated out of ++// from-space, or it refers to the to-space version of an object that ++// is being evacuated out of from-space. ++// ++// By default, this operation implements sequential consistency and the ++// value held in the result register following execution of the ++// generated code sequence is 0 to indicate failure of CAS, non-zero ++// to indicate success. Arguments support variations on this theme: ++// ++// acquire: Allow relaxation of the memory ordering on CAS from ++// sequential consistency. This can be useful when ++// sequential consistency is not required, such as when ++// another sequentially consistent operation is already ++// present in the execution stream. If acquire, successful ++// execution has the side effect of assuring that memory ++// values updated by other threads and "released" will be ++// visible to any read operations perfomed by this thread ++// which follow this operation in program order. This is a ++// special optimization that should not be enabled by default. ++// release: Allow relaxation of the memory ordering on CAS from ++// sequential consistency. This can be useful when ++// sequential consistency is not required, such as when ++// another sequentially consistent operation is already ++// present in the execution stream. If release, successful ++// completion of this operation has the side effect of ++// assuring that all writes to memory performed by this ++// thread that precede this operation in program order are ++// visible to all other threads that subsequently "acquire" ++// before reading the respective memory values. This is a ++// special optimization that should not be enabled by default. ++// is_cae: This turns CAS (compare and swap) into CAE (compare and ++// exchange). This HotSpot convention is that CAE makes ++// available to the caller the "failure witness", which is ++// the value that was stored in memory which did not match ++// the expected value. If is_cae, the result is the value ++// most recently fetched from addr rather than a boolean ++// success indicator. ++// ++// Clobbers t0, t1 ++void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, ++ Register addr, ++ Register expected, ++ Register new_val, ++ Assembler::Aqrl acquire, ++ Assembler::Aqrl release, ++ bool is_cae, ++ Register result) { ++ bool is_narrow = UseCompressedOops; ++ Assembler::operand_size size = is_narrow ? Assembler::uint32 : Assembler::int64; ++ ++ assert_different_registers(addr, expected, t0, t1); ++ assert_different_registers(addr, new_val, t0, t1); ++ ++ Label retry, success, fail, done; ++ ++ __ bind(retry); ++ ++ // Step1: Try to CAS. ++ __ cmpxchg(addr, expected, new_val, size, acquire, release, /* result */ t1); ++ ++ // If success, then we are done. ++ __ beq(expected, t1, success); ++ ++ // Step2: CAS failed, check the forwared pointer. ++ __ mv(t0, t1); ++ ++ if (is_narrow) { ++ __ decode_heap_oop(t0, t0); ++ } ++ resolve_forward_pointer(masm, t0); ++ ++ __ encode_heap_oop(t0, t0); ++ ++ // Report failure when the forwarded oop was not expected. ++ __ bne(t0, expected, fail); ++ ++ // Step 3: CAS again using the forwarded oop. ++ __ cmpxchg(addr, t1, new_val, size, acquire, release, /* result */ t0); ++ ++ // Retry when failed. ++ __ bne(t0, t1, retry); ++ ++ __ bind(success); ++ if (is_cae) { ++ __ mv(result, expected); ++ } else { ++ __ mv(result, 1); ++ } ++ __ j(done); ++ ++ __ bind(fail); ++ if (is_cae) { ++ __ mv(result, t0); ++ } else { ++ __ mv(result, zr); ++ } ++ ++ __ bind(done); ++} ++ ++#undef __ ++ ++#ifdef COMPILER1 ++ ++#define __ ce->masm()-> ++ ++void ShenandoahBarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub) { ++ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ // At this point we know that marking is in progress. ++ // If do_load() is true then we have to emit the ++ // load of the previous value; otherwise it has already ++ // been loaded into _pre_val. ++ __ bind(*stub->entry()); ++ ++ assert(stub->pre_val()->is_register(), "Precondition."); ++ ++ Register pre_val_reg = stub->pre_val()->as_register(); ++ ++ if (stub->do_load()) { ++ ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), ++ stub->info(), false /* wide */, false /* unaligned */); ++ } ++ __ beqz(pre_val_reg, *stub->continuation(), /* is_far */ true); ++ ce->store_parameter(stub->pre_val()->as_register(), 0); ++ __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); ++ __ j(*stub->continuation()); ++} ++ ++void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assembler* ce, ++ ShenandoahLoadReferenceBarrierStub* stub) { ++ ShenandoahBarrierSetC1* bs = (ShenandoahBarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); ++ __ bind(*stub->entry()); ++ ++ Register obj = stub->obj()->as_register(); ++ Register res = stub->result()->as_register(); ++ Register addr = stub->addr()->as_pointer_register(); ++ Register tmp1 = stub->tmp1()->as_register(); ++ Register tmp2 = stub->tmp2()->as_register(); ++ ++ assert(res == x10, "result must arrive in x10"); ++ assert_different_registers(tmp1, tmp2, t0); ++ ++ if (res != obj) { ++ __ mv(res, obj); ++ } ++ ++ // Check for null. ++ __ beqz(res, *stub->continuation(), /* is_far */ true); ++ ++ // Check for object in cset. ++ __ mv(tmp2, ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli(tmp1, res, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ add(t0, tmp2, tmp1); ++ __ lb(tmp2, Address(t0)); ++ __ beqz(tmp2, *stub->continuation(), /* is_far */ true); ++ ++ // Check if object is already forwarded. ++ Label slow_path; ++ __ ld(tmp1, Address(res, oopDesc::mark_offset_in_bytes())); ++ __ xori(tmp1, tmp1, -1); ++ __ andi(t0, tmp1, markOopDesc::lock_mask_in_place); ++ __ bnez(t0, slow_path); ++ ++ // Decode forwarded object. ++ __ ori(tmp1, tmp1, markOopDesc::marked_value); ++ __ xori(res, tmp1, -1); ++ __ j(*stub->continuation()); ++ ++ __ bind(slow_path); ++ ce->store_parameter(res, 0); ++ ce->store_parameter(addr, 1); ++ __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); ++ ++ __ j(*stub->continuation()); ++} ++ ++#undef __ ++ ++#define __ sasm-> ++ ++void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("shenandoah_pre_barrier", false); ++ ++ // arg0 : previous value of memory ++ ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ const Register pre_val = x10; ++ const Register thread = xthread; ++ const Register tmp = t0; ++ ++ Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ Label done; ++ Label runtime; ++ ++ // Is marking still active? ++ Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); ++ __ lb(tmp, gc_state); ++ __ andi(tmp, tmp, ShenandoahHeap::MARKING); ++ __ beqz(tmp, done); ++ ++ // Can we store original value in the thread's buffer? ++ __ ld(tmp, queue_index); ++ __ beqz(tmp, runtime); ++ ++ __ sub(tmp, tmp, wordSize); ++ __ sd(tmp, queue_index); ++ __ ld(t1, buffer); ++ __ add(tmp, tmp, t1); ++ __ load_parameter(0, t1); ++ __ sd(t1, Address(tmp, 0)); ++ __ j(done); ++ ++ __ bind(runtime); ++ __ push_call_clobbered_registers(); ++ __ load_parameter(0, pre_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), pre_val, thread); ++ __ pop_call_clobbered_registers(); ++ __ bind(done); ++ ++ __ epilogue(); ++} ++ ++void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm) { ++ __ prologue("shenandoah_load_reference_barrier", false); ++ // arg0 : object to be resolved ++ ++ __ push_call_clobbered_registers(); ++ __ load_parameter(0, x10); ++ __ load_parameter(1, x11); ++ if (UseCompressedOops) { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); ++ } else { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); ++ } ++ __ jalr(ra); ++ __ mv(t0, x10); ++ __ pop_call_clobbered_registers(); ++ __ mv(x10, t0); ++ ++ __ epilogue(); ++} ++ ++#undef __ ++ ++#endif // COMPILER1 ++ ++address ShenandoahBarrierSetAssembler::shenandoah_lrb() { ++ assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); ++ return _shenandoah_lrb; ++} ++ ++#define __ cgen->assembler()-> ++ ++// Shenandoah load reference barrier. ++// ++// Input: ++// x10: OOP to evacuate. Not null. ++// x11: load address ++// ++// Output: ++// x10: Pointer to evacuated OOP. ++// ++// Trash t0 t1 Preserve everything else. ++address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { ++ __ align(6); ++ StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); ++ address start = __ pc(); ++ ++ Label slow_path; ++ __ mv(t1, ShenandoahHeap::in_cset_fast_test_addr()); ++ __ srli(t0, x10, ShenandoahHeapRegion::region_size_bytes_shift_jint()); ++ __ add(t1, t1, t0); ++ __ lbu(t1, Address(t1, 0)); ++ __ andi(t0, t1, 1); ++ __ bnez(t0, slow_path); ++ __ ret(); ++ ++ __ bind(slow_path); ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ push_call_clobbered_registers(); ++ ++ if (UseCompressedOops) { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); ++ } else { ++ __ mv(ra, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); ++ } ++ __ jalr(ra); ++ __ mv(t0, x10); ++ __ pop_call_clobbered_registers(); ++ __ mv(x10, t0); ++ ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ ret(); ++ ++ return start; ++} ++ ++#undef __ ++ ++void ShenandoahBarrierSetAssembler::barrier_stubs_init() { ++ if (ShenandoahLoadRefBarrier) { ++ int stub_code_size = 2048; ++ ResourceMark rm; ++ BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); ++ CodeBuffer buf(bb); ++ StubCodeGenerator cgen(&buf); ++ _shenandoah_lrb = generate_shenandoah_lrb(&cgen); ++ } ++} +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp +new file mode 100644 +index 000000000..84bc55706 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.hpp +@@ -0,0 +1,92 @@ ++/* ++ * Copyright (c) 2018, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP ++#define CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#ifdef COMPILER1 ++class LIR_Assembler; ++class ShenandoahPreBarrierStub; ++class ShenandoahLoadReferenceBarrierStub; ++class StubAssembler; ++#endif ++class StubCodeGenerator; ++ ++class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { ++public: ++ static address shenandoah_lrb(); ++ ++ void iu_barrier(MacroAssembler *masm, Register dst, Register tmp); ++ ++#ifdef COMPILER1 ++ void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); ++ void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm); ++#endif ++ ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register src, Register dst, Register count, RegSet saved_regs); ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2, Register tmp3); ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); ++ virtual void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, bool is_cae, Register result); ++ ++ virtual void barrier_stubs_init(); ++ ++private: ++ ++ static address _shenandoah_lrb; ++ ++ void satb_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ void shenandoah_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); ++ void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); ++ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr); ++ void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr); ++ ++ address generate_shenandoah_lrb(StubCodeGenerator* cgen); ++}; ++ ++#endif // CPU_RISCV_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad +new file mode 100644 +index 000000000..6e310697d +--- /dev/null ++++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoah_riscv64.ad +@@ -0,0 +1,188 @@ ++// ++// Copyright (c) 2018, Red Hat, Inc. All rights reserved. ++// Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++source_hpp %{ ++#include "gc/shenandoah/shenandoahBarrierSet.hpp" ++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" ++%} ++ ++instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ ++ effect(TEMP tmp, KILL cr); ++ ++ format %{ ++ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapP_shenandoah" ++ %} ++ ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ ++ effect(TEMP tmp, KILL cr); ++ ++ format %{ ++ "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapN_shenandoah" ++ %} ++ ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ ++ effect(TEMP tmp, KILL cr); ++ ++ format %{ ++ "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapPAcq_shenandoah" ++ %} ++ ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ ++ effect(TEMP tmp, KILL cr); ++ ++ format %{ ++ "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndSwapNAcq_shenandoah" ++ %} ++ ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN_shenandoah" ++ %} ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ true /* is_cae */, $res$$Register); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ ++ effect(TEMP_DEF res, TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp, #@compareAndExchangeP_shenandoah" ++ %} ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ true /* is_cae */, $res$$Register); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ ++ effect(TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapN_shenandoah" ++ "mv $res, EQ\t# $res <-- (EQ ? 1 : 0)" ++ %} ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ // Weak is not current supported by ShenandoahBarrierSet::cmpxchg_oop ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ ++ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); ++ ins_cost(10 * DEFAULT_COST); ++ ++ effect(TEMP tmp, KILL cr); ++ format %{ ++ "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@weakCompareAndSwapP_shenandoah" ++ %} ++ ins_encode %{ ++ Register tmp = $tmp$$Register; ++ __ mv(tmp, $oldval$$Register); // Must not clobber oldval. ++ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, ++ false /* is_cae */, $res$$Register); ++ %} ++ ins_pipe(pipe_slow); ++%} +diff --git a/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +new file mode 100644 +index 000000000..96068e637 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/globalDefinitions_riscv.hpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP ++#define CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP ++ ++const int StackAlignmentInBytes = 16; ++ ++// Indicates whether the C calling conventions require that ++// 32-bit integer argument values are extended to 64 bits. ++const bool CCallingConventionRequiresIntsAsLongs = false; ++ ++#define DEOPTIMIZE_WHEN_PATCHING ++ ++#define SUPPORTS_NATIVE_CX8 ++ ++#define SUPPORT_RESERVED_STACK_AREA ++ ++#define THREAD_LOCAL_POLL ++ ++#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/globals_riscv.hpp b/src/hotspot/cpu/riscv/globals_riscv.hpp +new file mode 100644 +index 000000000..b46661a8f +--- /dev/null ++++ b/src/hotspot/cpu/riscv/globals_riscv.hpp +@@ -0,0 +1,120 @@ ++/* ++ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_GLOBALS_RISCV_HPP ++#define CPU_RISCV_GLOBALS_RISCV_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++ ++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks ++define_pd_global(bool, TrapBasedNullChecks, false); ++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast ++ ++define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. ++define_pd_global(intx, CodeEntryAlignment, 64); ++define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); ++ ++#define DEFAULT_STACK_YELLOW_PAGES (2) ++#define DEFAULT_STACK_RED_PAGES (1) ++// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the ++// stack if compiled for unix and LP64. To pass stack overflow tests we need ++// 20 shadow pages. ++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5)) ++#define DEFAULT_STACK_RESERVED_PAGES (1) ++ ++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES ++#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES ++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES ++#define MIN_STACK_RESERVED_PAGES (0) ++ ++define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); ++define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); ++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); ++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); ++ ++define_pd_global(bool, RewriteBytecodes, true); ++define_pd_global(bool, RewriteFrequentPairs, true); ++ ++define_pd_global(bool, UseMembar, true); ++ ++define_pd_global(bool, PreserveFramePointer, false); ++ ++// GC Ergo Flags ++define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread ++ ++define_pd_global(uintx, TypeProfileLevel, 111); ++ ++define_pd_global(bool, CompactStrings, true); ++ ++// Clear short arrays bigger than one word in an arch-specific way ++define_pd_global(intx, InitArrayShortSize, BytesPerLong); ++ ++define_pd_global(bool, ThreadLocalHandshakes, true); ++ ++define_pd_global(intx, InlineSmallCode, 1000); ++ ++#define ARCH_FLAGS(develop, \ ++ product, \ ++ diagnostic, \ ++ experimental, \ ++ notproduct, \ ++ range, \ ++ constraint, \ ++ writeable) \ ++ \ ++ product(bool, NearCpool, true, \ ++ "constant pool is close to instructions") \ ++ product(bool, UseBarriersForVolatile, false, \ ++ "Use memory barriers to implement volatile accesses") \ ++ product(bool, UseCRC32, false, \ ++ "Use CRC32 instructions for CRC32 computation") \ ++ product(bool, UseBlockZeroing, true, \ ++ "Use DC ZVA for block zeroing") \ ++ product(intx, BlockZeroingLowLimit, 256, \ ++ "Minimum size in bytes when block zeroing will be used") \ ++ range(1, max_jint) \ ++ product(bool, TraceTraps, false, "Trace all traps the signal handler") \ ++ /* For now we're going to be safe and add the I/O bits to userspace fences. */ \ ++ product(bool, UseConservativeFence, true, \ ++ "Extend i for r and o for w in the pred/succ flags of fence") \ ++ product(bool, AvoidUnalignedAccesses, true, \ ++ "Avoid generating unaligned memory accesses") \ ++ product(intx, EagerArrayCopyThreshold, 128, \ ++ "Threshod of array length by bytes to " \ ++ "trigger the eager array copy") \ ++ range(0, 65535) \ ++ experimental(bool, UseRVV, false, "Use RVV instructions") \ ++ experimental(bool, UseZba, false, "Use Zba instructions") \ ++ experimental(bool, UseZbb, false, "Use Zbb instructions") ++ ++#endif // CPU_RISCV_GLOBALS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/icBuffer_riscv.cpp b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp +new file mode 100644 +index 000000000..980b2a81b +--- /dev/null ++++ b/src/hotspot/cpu/riscv/icBuffer_riscv.cpp +@@ -0,0 +1,79 @@ ++/* ++ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/icBuffer.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/bytecodes.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/oop.inline.hpp" ++ ++int InlineCacheBuffer::ic_stub_code_size() { ++ // 6: auipc + ld + auipc + jalr + address(2 * instruction_size) ++ // 5: auipc + ld + j + address(2 * instruction_size ) ++ return (MacroAssembler::far_branches() ? 6 : 5) * NativeInstruction::instruction_size; ++} ++ ++#define __ masm-> ++ ++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { ++ assert_cond(code_begin != NULL && entry_point != NULL); ++ ResourceMark rm; ++ CodeBuffer code(code_begin, ic_stub_code_size()); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ // Note: even though the code contains an embedded value, we do not need reloc info ++ // because ++ // (1) the value is old (i.e., doesn't matter for scavenges) ++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear ++ ++ address start = __ pc(); ++ Label l; ++ __ ld(t1, l); ++ __ far_jump(ExternalAddress(entry_point)); ++ __ align(wordSize); ++ __ bind(l); ++ __ emit_int64((intptr_t)cached_value); ++ // Only need to invalidate the 1st two instructions - not the whole ic stub ++ ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size()); ++ assert(__ pc() - start == ic_stub_code_size(), "must be"); ++} ++ ++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object ++ NativeJump* jump = nativeJump_at(move->next_instruction_address()); ++ return jump->jump_destination(); ++} ++ ++ ++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ++ // The word containing the cached value is at the end of this IC buffer ++ uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize); ++ void* o = (void*)*p; ++ return o; ++} +diff --git a/src/hotspot/cpu/riscv/icache_riscv.cpp b/src/hotspot/cpu/riscv/icache_riscv.cpp +new file mode 100644 +index 000000000..ed8022784 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/icache_riscv.cpp +@@ -0,0 +1,61 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/icache.hpp" ++#include "macroAssembler_riscv.hpp" ++ ++#define __ _masm-> ++ ++static int icache_flush(address addr, int lines, int magic) { ++ // To make a store to instruction memory visible to all RISC-V harts, ++ // the writing hart has to execute a data FENCE before requesting that ++ // all remote RISC-V harts execute a FENCE.I ++ // ++ // No such-assurance is defined at the interface level of the builtin ++ // method, and so we should make sure it works. ++ __asm__ volatile("fence rw, rw" : : : "memory"); ++ ++ __builtin___clear_cache(addr, addr + (lines << ICache::log2_line_size)); ++ return magic; ++} ++ ++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { ++ ++ address start = (address)icache_flush; ++ ++ *flush_icache_stub = (ICache::flush_icache_stub_t)start; ++ ++ // ICache::invalidate_range() contains explicit condition that the first ++ // call is invoked on the generated icache flush stub code range. ++ ICache::invalidate_range(start, 0); ++ ++ { ++ StubCodeMark mark(this, "ICache", "fake_stub_for_inlined_icache_flush"); ++ __ ret(); ++ } ++} ++ ++#undef __ +diff --git a/src/hotspot/cpu/riscv/icache_riscv.hpp b/src/hotspot/cpu/riscv/icache_riscv.hpp +new file mode 100644 +index 000000000..a503d3be3 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/icache_riscv.hpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_ICACHE_RISCV_HPP ++#define CPU_RISCV_ICACHE_RISCV_HPP ++ ++// Interface for updating the instruction cache. Whenever the VM ++// modifies code, part of the processor instruction cache potentially ++// has to be flushed. ++ ++class ICache : public AbstractICache { ++public: ++ enum { ++ stub_size = 16, // Size of the icache flush stub in bytes ++ line_size = BytesPerWord, // conservative ++ log2_line_size = LogBytesPerWord // log2(line_size) ++ }; ++}; ++ ++#endif // CPU_RISCV_ICACHE_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +new file mode 100644 +index 000000000..91deb0ae2 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +@@ -0,0 +1,1932 @@ ++/* ++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interp_masm_riscv.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "logging/log.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markOop.hpp" ++#include "oops/method.hpp" ++#include "oops/methodData.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.inline.hpp" ++ ++ ++void InterpreterMacroAssembler::narrow(Register result) { ++ // Get method->_constMethod->_result_type ++ ld(t0, Address(fp, frame::interpreter_frame_method_offset * wordSize)); ++ ld(t0, Address(t0, Method::const_offset())); ++ lbu(t0, Address(t0, ConstMethod::result_type_offset())); ++ ++ Label done, notBool, notByte, notChar; ++ ++ // common case first ++ mv(t1, T_INT); ++ beq(t0, t1, done); ++ ++ // mask integer result to narrower return type. ++ mv(t1, T_BOOLEAN); ++ bne(t0, t1, notBool); ++ ++ andi(result, result, 0x1); ++ j(done); ++ ++ bind(notBool); ++ mv(t1, T_BYTE); ++ bne(t0, t1, notByte); ++ sign_extend(result, result, 8); ++ j(done); ++ ++ bind(notByte); ++ mv(t1, T_CHAR); ++ bne(t0, t1, notChar); ++ zero_extend(result, result, 16); ++ j(done); ++ ++ bind(notChar); ++ sign_extend(result, result, 16); ++ ++ // Nothing to do for T_INT ++ bind(done); ++ addw(result, result, zr); ++} ++ ++void InterpreterMacroAssembler::jump_to_entry(address entry) { ++ assert(entry != NULL, "Entry must have been generated by now"); ++ j(entry); ++} ++ ++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { ++ if (JvmtiExport::can_pop_frame()) { ++ Label L; ++ // Initiate popframe handling only if it is not already being ++ // processed. If the flag has the popframe_processing bit set, ++ // it means that this code is called *during* popframe handling - we ++ // don't want to reenter. ++ // This method is only called just after the call into the vm in ++ // call_VM_base, so the arg registers are available. ++ lwu(t1, Address(xthread, JavaThread::popframe_condition_offset())); ++ andi(t0, t1, JavaThread::popframe_pending_bit); ++ beqz(t0, L); ++ andi(t0, t1, JavaThread::popframe_processing_bit); ++ bnez(t0, L); ++ // Call Interpreter::remove_activation_preserving_args_entry() to get the ++ // address of the same-named entrypoint in the generated interpreter code. ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); ++ jr(x10); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::load_earlyret_value(TosState state) { ++ ld(x12, Address(xthread, JavaThread::jvmti_thread_state_offset())); ++ const Address tos_addr(x12, JvmtiThreadState::earlyret_tos_offset()); ++ const Address oop_addr(x12, JvmtiThreadState::earlyret_oop_offset()); ++ const Address val_addr(x12, JvmtiThreadState::earlyret_value_offset()); ++ switch (state) { ++ case atos: ++ ld(x10, oop_addr); ++ sd(zr, oop_addr); ++ verify_oop(x10); ++ break; ++ case ltos: ++ ld(x10, val_addr); ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ lwu(x10, val_addr); ++ break; ++ case ftos: ++ flw(f10, val_addr); ++ break; ++ case dtos: ++ fld(f10, val_addr); ++ break; ++ case vtos: ++ /* nothing to do */ ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ // Clean up tos value in the thread object ++ mvw(t0, (int) ilgl); ++ sw(t0, tos_addr); ++ sw(zr, val_addr); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { ++ if (JvmtiExport::can_force_early_return()) { ++ Label L; ++ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); ++ beqz(t0, L); // if [thread->jvmti_thread_state() == NULL] then exit ++ ++ // Initiate earlyret handling only if it is not already being processed. ++ // If the flag has the earlyret_processing bit set, it means that this code ++ // is called *during* earlyret handling - we don't want to reenter. ++ lwu(t0, Address(t0, JvmtiThreadState::earlyret_state_offset())); ++ mv(t1, JvmtiThreadState::earlyret_pending); ++ bne(t0, t1, L); ++ ++ // Call Interpreter::remove_activation_early_entry() to get the address of the ++ // same-named entrypoint in the generated interpreter code. ++ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); ++ lwu(t0, Address(t0, JvmtiThreadState::earlyret_tos_offset())); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), t0); ++ jr(x10); ++ bind(L); ++ } ++} ++ ++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { ++ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); ++ lhu(reg, Address(xbcp, bcp_offset)); ++ revb_h(reg, reg); ++} ++ ++void InterpreterMacroAssembler::get_dispatch() { ++ int32_t offset = 0; ++ la_patchable(xdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset); ++ addi(xdispatch, xdispatch, offset); ++} ++ ++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ if (index_size == sizeof(u2)) { ++ load_unsigned_short(index, Address(xbcp, bcp_offset)); ++ } else if (index_size == sizeof(u4)) { ++ lwu(index, Address(xbcp, bcp_offset)); ++ // Check if the secondary index definition is still ~x, otherwise ++ // we have to change the following assembler code to calculate the ++ // plain index. ++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); ++ xori(index, index, -1); ++ addw(index, index, zr); ++ } else if (index_size == sizeof(u1)) { ++ load_unsigned_byte(index, Address(xbcp, bcp_offset)); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++// Return ++// Rindex: index into constant pool ++// Rcache: address of cache entry - ConstantPoolCache::base_offset() ++// ++// A caller must add ConstantPoolCache::base_offset() to Rcache to get ++// the true address of the cache entry. ++// ++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, ++ Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert_different_registers(cache, index); ++ assert_different_registers(cache, xcpool); ++ get_cache_index_at_bcp(index, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // Convert from field index to ConstantPoolCacheEntry ++ // riscv already has the cache in xcpool so there is no need to ++ // install it in cache. Instead we pre-add the indexed offset to ++ // xcpool and return it in cache. All clients of this method need to ++ // be modified accordingly. ++ shadd(cache, index, xcpool, cache, 5); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, ++ Register index, ++ Register bytecode, ++ int byte_no, ++ int bcp_offset, ++ size_t index_size) { ++ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); ++ // We use a 32-bit load here since the layout of 64-bit words on ++ // little-endian machines allow us that. ++ // n.b. unlike x86 cache already includes the index offset ++ la(bytecode, Address(cache, ++ ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::indices_offset())); ++ membar(MacroAssembler::AnyAny); ++ lwu(bytecode, bytecode); ++ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ const int shift_count = (1 + byte_no) * BitsPerByte; ++ slli(bytecode, bytecode, XLEN - (shift_count + BitsPerByte)); ++ srli(bytecode, bytecode, XLEN - BitsPerByte); ++} ++ ++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, ++ Register tmp, ++ int bcp_offset, ++ size_t index_size) { ++ assert(cache != tmp, "must use different register"); ++ get_cache_index_at_bcp(tmp, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // Convert from field index to ConstantPoolCacheEntry index ++ // and from word offset to byte offset ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); ++ ld(cache, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); ++ // skip past the header ++ add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); ++ // construct pointer to cache entry ++ shadd(cache, tmp, cache, tmp, 2 + LogBytesPerWord); ++} ++ ++// Load object from cpool->resolved_references(index) ++void InterpreterMacroAssembler::load_resolved_reference_at_index( ++ Register result, Register index, Register tmp) { ++ assert_different_registers(result, index); ++ ++ get_constant_pool(result); ++ // Load pointer for resolved_references[] objArray ++ ld(result, Address(result, ConstantPool::cache_offset_in_bytes())); ++ ld(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes())); ++ resolve_oop_handle(result, tmp); ++ // Add in the index ++ addi(index, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); ++ shadd(result, index, result, index, LogBytesPerHeapOop); ++ load_heap_oop(result, Address(result, 0)); ++} ++ ++void InterpreterMacroAssembler::load_resolved_klass_at_offset( ++ Register cpool, Register index, Register klass, Register temp) { ++ shadd(temp, index, cpool, temp, LogBytesPerWord); ++ lhu(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index ++ ld(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses ++ shadd(klass, temp, klass, temp, LogBytesPerWord); ++ ld(klass, Address(klass, Array::base_offset_in_bytes())); ++} ++ ++// Generate a subtype check: branch to ok_is_subtype if sub_klass is a ++// subtype of super_klass. ++// ++// Args: ++// x10: superklass ++// Rsub_klass: subklass ++// ++// Kills: ++// x12, x15 ++void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, ++ Label& ok_is_subtype) { ++ assert(Rsub_klass != x10, "x10 holds superklass"); ++ assert(Rsub_klass != x12, "x12 holds 2ndary super array length"); ++ assert(Rsub_klass != x15, "x15 holds 2ndary super array scan ptr"); ++ ++ // Profile the not-null value's klass. ++ profile_typecheck(x12, Rsub_klass, x15); // blows x12, reloads x15 ++ ++ // Do the check. ++ check_klass_subtype(Rsub_klass, x10, x12, ok_is_subtype); // blows x12 ++ ++ // Profile the failure of the check. ++ profile_typecheck_failed(x12); // blows x12 ++} ++ ++// Java Expression Stack ++ ++void InterpreterMacroAssembler::pop_ptr(Register r) { ++ ld(r, Address(esp, 0)); ++ addi(esp, esp, wordSize); ++} ++ ++void InterpreterMacroAssembler::pop_i(Register r) { ++ lw(r, Address(esp, 0)); // lw do signed extended ++ addi(esp, esp, wordSize); ++} ++ ++void InterpreterMacroAssembler::pop_l(Register r) { ++ ld(r, Address(esp, 0)); ++ addi(esp, esp, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_ptr(Register r) { ++ addi(esp, esp, -wordSize); ++ sd(r, Address(esp, 0)); ++} ++ ++void InterpreterMacroAssembler::push_i(Register r) { ++ addi(esp, esp, -wordSize); ++ addw(r, r, zr); // signed extended ++ sd(r, Address(esp, 0)); ++} ++ ++void InterpreterMacroAssembler::push_l(Register r) { ++ addi(esp, esp, -2 * wordSize); ++ sd(zr, Address(esp, wordSize)); ++ sd(r, Address(esp)); ++} ++ ++void InterpreterMacroAssembler::pop_f(FloatRegister r) { ++ flw(r, esp, 0); ++ addi(esp, esp, wordSize); ++} ++ ++void InterpreterMacroAssembler::pop_d(FloatRegister r) { ++ fld(r, esp, 0); ++ addi(esp, esp, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_f(FloatRegister r) { ++ addi(esp, esp, -wordSize); ++ fsw(r, Address(esp, 0)); ++} ++ ++void InterpreterMacroAssembler::push_d(FloatRegister r) { ++ addi(esp, esp, -2 * wordSize); ++ fsd(r, Address(esp, 0)); ++} ++ ++void InterpreterMacroAssembler::pop(TosState state) { ++ switch (state) { ++ case atos: ++ pop_ptr(); ++ verify_oop(x10); ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ pop_i(); ++ break; ++ case ltos: ++ pop_l(); ++ break; ++ case ftos: ++ pop_f(); ++ break; ++ case dtos: ++ pop_d(); ++ break; ++ case vtos: ++ /* nothing to do */ ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void InterpreterMacroAssembler::push(TosState state) { ++ switch (state) { ++ case atos: ++ verify_oop(x10); ++ push_ptr(); ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ push_i(); ++ break; ++ case ltos: ++ push_l(); ++ break; ++ case ftos: ++ push_f(); ++ break; ++ case dtos: ++ push_d(); ++ break; ++ case vtos: ++ /* nothing to do */ ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++// Helpers for swap and dup ++void InterpreterMacroAssembler::load_ptr(int n, Register val) { ++ ld(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); ++} ++ ++void InterpreterMacroAssembler::store_ptr(int n, Register val) { ++ sd(val, Address(esp, Interpreter::expr_offset_in_bytes(n))); ++} ++ ++void InterpreterMacroAssembler::load_float(Address src) { ++ flw(f10, src); ++} ++ ++void InterpreterMacroAssembler::load_double(Address src) { ++ fld(f10, src); ++} ++ ++void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { ++ // set sender sp ++ mv(x30, sp); ++ // record last_sp ++ sd(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++} ++ ++// Jump to from_interpreted entry of a call unless single stepping is possible ++// in this thread in which case we must call the i2i entry ++void InterpreterMacroAssembler::jump_from_interpreted(Register method) { ++ prepare_to_jump_from_interpreted(); ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); ++ beqz(t0, run_compiled_code); ++ ld(t0, Address(method, Method::interpreter_entry_offset())); ++ jr(t0); ++ bind(run_compiled_code); ++ } ++ ++ ld(t0, Address(method, Method::from_interpreted_offset())); ++ jr(t0); ++} ++ ++// The following two routines provide a hook so that an implementation ++// can schedule the dispatch in two parts. amd64 does not do this. ++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { ++} ++ ++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { ++ dispatch_next(state, step); ++} ++ ++void InterpreterMacroAssembler::dispatch_base(TosState state, ++ address* table, ++ bool verifyoop, ++ bool generate_poll, ++ Register Rs) { ++ // Pay attention to the argument Rs, which is acquiesce in t0. ++ if (VerifyActivationFrameSize) { ++ Unimplemented(); ++ } ++ if (verifyoop && state == atos) { ++ verify_oop(x10); ++ } ++ ++ Label safepoint; ++ address* const safepoint_table = Interpreter::safept_table(state); ++ bool needs_thread_local_poll = generate_poll && ++ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; ++ ++ if (needs_thread_local_poll) { ++ NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); ++ ld(t1, Address(xthread, Thread::polling_page_offset())); ++ andi(t1, t1, 1 << exact_log2(SafepointMechanism::poll_bit())); ++ bnez(t1, safepoint); ++ } ++ if (table == Interpreter::dispatch_table(state)) { ++ mv(t1, Interpreter::distance_from_dispatch_table(state)); ++ add(t1, Rs, t1); ++ shadd(t1, t1, xdispatch, t1, 3); ++ } else { ++ mv(t1, (address)table); ++ shadd(t1, Rs, t1, Rs, 3); ++ } ++ ld(t1, Address(t1)); ++ jr(t1); ++ ++ if (needs_thread_local_poll) { ++ bind(safepoint); ++ la(t1, ExternalAddress((address)safepoint_table)); ++ shadd(t1, Rs, t1, Rs, 3); ++ ld(t1, Address(t1)); ++ jr(t1); ++ } ++} ++ ++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll, Register Rs) { ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll, Rs); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_normal(TosState state, Register Rs) { ++ dispatch_base(state, Interpreter::normal_table(state), Rs); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state, Register Rs) { ++ dispatch_base(state, Interpreter::normal_table(state), false, Rs); ++} ++ ++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { ++ // load next bytecode ++ load_unsigned_byte(t0, Address(xbcp, step)); ++ add(xbcp, xbcp, step); ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { ++ // load current bytecode ++ lbu(t0, Address(xbcp, 0)); ++ dispatch_base(state, table); ++} ++ ++// remove activation ++// ++// Unlock the receiver if this is a synchronized method. ++// Unlock any Java monitors from syncronized blocks. ++// Remove the activation from the stack. ++// ++// If there are locked Java monitors ++// If throw_monitor_exception ++// throws IllegalMonitorStateException ++// Else if install_monitor_exception ++// installs IllegalMonitorStateException ++// Else ++// no error processing ++void InterpreterMacroAssembler::remove_activation( ++ TosState state, ++ bool throw_monitor_exception, ++ bool install_monitor_exception, ++ bool notify_jvmdi) { ++ // Note: Registers x13 may be in use for the ++ // result check if synchronized method ++ Label unlocked, unlock, no_unlock; ++ ++ // get the value of _do_not_unlock_if_synchronized into x13 ++ const Address do_not_unlock_if_synchronized(xthread, ++ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ lbu(x13, do_not_unlock_if_synchronized); ++ sb(zr, do_not_unlock_if_synchronized); // reset the flag ++ ++ // get method access flags ++ ld(x11, Address(fp, frame::interpreter_frame_method_offset * wordSize)); ++ ld(x12, Address(x11, Method::access_flags_offset())); ++ andi(t0, x12, JVM_ACC_SYNCHRONIZED); ++ beqz(t0, unlocked); ++ ++ // Don't unlock anything if the _do_not_unlock_if_synchronized flag ++ // is set. ++ bnez(x13, no_unlock); ++ ++ // unlock monitor ++ push(state); // save result ++ ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object has ++ // not been unlocked by an explicit monitorexit bytecode. ++ const Address monitor(fp, frame::interpreter_frame_initial_sp_offset * ++ wordSize - (int) sizeof(BasicObjectLock)); ++ // We use c_rarg1 so that if we go slow path it will be the correct ++ // register for unlock_object to pass to VM directly ++ la(c_rarg1, monitor); // address of first monitor ++ ++ ld(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); ++ bnez(x10, unlock); ++ ++ pop(state); ++ if (throw_monitor_exception) { ++ // Entry already unlocked, need to throw exception ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Monitor already unlocked during a stack unroll. If requested, ++ // install an illegal_monitor_state_exception. Continue with ++ // stack unrolling. ++ if (install_monitor_exception) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ } ++ j(unlocked); ++ } ++ ++ bind(unlock); ++ unlock_object(c_rarg1); ++ pop(state); ++ ++ // Check that for block-structured locking (i.e., that all locked ++ // objects has been unlocked) ++ bind(unlocked); ++ ++ // x10: Might contain return value ++ ++ // Check that all monitors are unlocked ++ { ++ Label loop, exception, entry, restart; ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ const Address monitor_block_top( ++ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ const Address monitor_block_bot( ++ fp, frame::interpreter_frame_initial_sp_offset * wordSize); ++ ++ bind(restart); ++ // We use c_rarg1 so that if we go slow path it will be the correct ++ // register for unlock_object to pass to VM directly ++ ld(c_rarg1, monitor_block_top); // points to current entry, starting ++ // with top-most entry ++ la(x9, monitor_block_bot); // points to word before bottom of ++ // monitor block ++ ++ j(entry); ++ ++ // Entry already locked, need to throw exception ++ bind(exception); ++ ++ if (throw_monitor_exception) { ++ // Throw exception ++ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime:: ++ throw_illegal_monitor_state_exception)); ++ ++ should_not_reach_here(); ++ } else { ++ // Stack unrolling. Unlock object and install illegal_monitor_exception. ++ // Unlock does not block, so don't have to worry about the frame. ++ // We don't have to preserve c_rarg1 since we are going to throw an exception. ++ ++ push(state); ++ unlock_object(c_rarg1); ++ pop(state); ++ ++ if (install_monitor_exception) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime:: ++ new_illegal_monitor_state_exception)); ++ } ++ ++ j(restart); ++ } ++ ++ bind(loop); ++ // check if current entry is used ++ add(t0, c_rarg1, BasicObjectLock::obj_offset_in_bytes()); ++ ld(t0, Address(t0, 0)); ++ bnez(t0, exception); ++ ++ add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry ++ bind(entry); ++ bne(c_rarg1, x9, loop); // check if bottom reached if not at bottom then check this entry ++ } ++ ++ bind(no_unlock); ++ ++ // jvmti support ++ if (notify_jvmdi) { ++ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA ++ ++ } else { ++ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA ++ } ++ ++ // remove activation ++ // get sender esp ++ ld(t1, ++ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ if (StackReservedPages > 0) { ++ // testing if reserved zone needs to be re-enabled ++ Label no_reserved_zone_enabling; ++ ++ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); ++ ble(t1, t0, no_reserved_zone_enabling); ++ ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), xthread); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_delayed_StackOverflowError)); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++ } ++ ++ // restore sender esp ++ mv(esp, t1); ++ // remove frame anchor ++ leave(); ++ // If we're returning to interpreted code we will shortly be ++ // adjusting SP to allow some space for ESP. If we're returning to ++ // compiled code the saved sender SP was saved in sender_sp, so this ++ // restores it. ++ andi(sp, esp, -16); ++} ++ ++// Lock object ++// ++// Args: ++// c_rarg1: BasicObjectLock to be used for locking ++// ++// Kills: ++// x10 ++// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs) ++// t0, t1 (temp regs) ++void InterpreterMacroAssembler::lock_object(Register lock_reg) ++{ ++ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); ++ if (UseHeavyMonitors) { ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), ++ lock_reg); ++ } else { ++ Label done; ++ ++ const Register swap_reg = x10; ++ const Register tmp = c_rarg2; ++ const Register obj_reg = c_rarg3; // Will contain the oop ++ ++ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); ++ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); ++ const int mark_offset = lock_offset + ++ BasicLock::displaced_header_offset_in_bytes(); ++ ++ Label slow_case; ++ ++ // Load object pointer into obj_reg c_rarg3 ++ ld(obj_reg, Address(lock_reg, obj_offset)); ++ ++ if (UseBiasedLocking) { ++ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case); ++ } ++ ++ // Load (object->mark() | 1) into swap_reg ++ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ ori(swap_reg, t0, 1); ++ ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ sd(swap_reg, Address(lock_reg, mark_offset)); ++ ++ assert(lock_offset == 0, ++ "displached header must be first word in BasicObjectLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label fail, fast; ++ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, fast, &fail); ++ bind(fast); ++ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), ++ t1, t0); ++ j(done); ++ bind(fail); ++ } else { ++ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, done, /*fallthrough*/NULL); ++ } ++ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 7) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (7 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 3 bits clear. ++ // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg ++ sub(swap_reg, swap_reg, sp); ++ mv(t0, (int64_t)(7 - os::vm_page_size())); ++ andr(swap_reg, swap_reg, t0); ++ ++ // Save the test result, for recursive case, the result is zero ++ sd(swap_reg, Address(lock_reg, mark_offset)); ++ ++ if (PrintBiasedLockingStatistics) { ++ bnez(swap_reg, slow_case); ++ atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()), ++ t1, t0); ++ } ++ beqz(swap_reg, done); ++ ++ bind(slow_case); ++ ++ // Call the runtime routine for slow case ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), ++ lock_reg); ++ ++ bind(done); ++ } ++} ++ ++ ++// Unlocks an object. Used in monitorexit bytecode and ++// remove_activation. Throws an IllegalMonitorException if object is ++// not locked by current thread. ++// ++// Args: ++// c_rarg1: BasicObjectLock for lock ++// ++// Kills: ++// x10 ++// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs) ++// t0, t1 (temp regs) ++void InterpreterMacroAssembler::unlock_object(Register lock_reg) ++{ ++ assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); ++ } else { ++ Label done; ++ ++ const Register swap_reg = x10; ++ const Register header_reg = c_rarg2; // Will contain the old oopMark ++ const Register obj_reg = c_rarg3; // Will contain the oop ++ ++ save_bcp(); // Save in case of exception ++ ++ // Convert from BasicObjectLock structure to object and BasicLock ++ // structure Store the BasicLock address into x10 ++ la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); ++ ++ // Load oop into obj_reg(c_rarg3) ++ ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); ++ ++ // Free entry ++ sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); ++ ++ if (UseBiasedLocking) { ++ biased_locking_exit(obj_reg, header_reg, done); ++ } ++ ++ // Load the old header from BasicLock structure ++ ld(header_reg, Address(swap_reg, ++ BasicLock::displaced_header_offset_in_bytes())); ++ ++ // Test for recursion ++ beqz(header_reg, done); ++ ++ // Atomic swap back the old header ++ cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, done, /*fallthrough*/NULL); ++ ++ // Call the runtime routine for slow case. ++ sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); ++ ++ bind(done); ++ ++ restore_bcp(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, ++ Label& zero_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++ beqz(mdp, zero_continue); ++} ++ ++// Set the method data pointer for the current bcp. ++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Label set_mdp; ++ push_reg(RegSet::of(x10, x11), sp); // save x10, x11 ++ ++ // Test MDO to avoid the call if it is NULL. ++ ld(x10, Address(xmethod, in_bytes(Method::method_data_offset()))); ++ beqz(x10, set_mdp); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), xmethod, xbcp); ++ // x10: mdi ++ // mdo is guaranteed to be non-zero here, we checked for it before the call. ++ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); ++ la(x11, Address(x11, in_bytes(MethodData::data_offset()))); ++ add(x10, x11, x10); ++ sd(x10, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++ bind(set_mdp); ++ pop_reg(RegSet::of(x10, x11), sp); ++} ++ ++void InterpreterMacroAssembler::verify_method_data_pointer() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++#ifdef ASSERT ++ Label verify_continue; ++ add(sp, sp, -4 * wordSize); ++ sd(x10, Address(sp, 0)); ++ sd(x11, Address(sp, wordSize)); ++ sd(x12, Address(sp, 2 * wordSize)); ++ sd(x13, Address(sp, 3 * wordSize)); ++ test_method_data_pointer(x13, verify_continue); // If mdp is zero, continue ++ get_method(x11); ++ ++ // If the mdp is valid, it will point to a DataLayout header which is ++ // consistent with the bcp. The converse is highly probable also. ++ lh(x12, Address(x13, in_bytes(DataLayout::bci_offset()))); ++ ld(t0, Address(x11, Method::const_offset())); ++ add(x12, x12, t0); ++ la(x12, Address(x12, ConstMethod::codes_offset())); ++ beq(x12, xbcp, verify_continue); ++ // x10: method ++ // xbcp: bcp // xbcp == 22 ++ // x13: mdp ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), ++ x11, xbcp, x13); ++ bind(verify_continue); ++ ld(x10, Address(sp, 0)); ++ ld(x11, Address(sp, wordSize)); ++ ld(x12, Address(sp, 2 * wordSize)); ++ ld(x13, Address(sp, 3 * wordSize)); ++ add(sp, sp, 4 * wordSize); ++#endif // ASSERT ++} ++ ++ ++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, ++ int constant, ++ Register value) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Address data(mdp_in, constant); ++ sd(value, data); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ int constant, ++ bool decrement) { ++ increment_mdp_data_at(mdp_in, noreg, constant, decrement); ++} ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ Register reg, ++ int constant, ++ bool decrement) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ // %%% this does 64bit counters at best it is wasting space ++ // at worst it is a rare bug when counters overflow ++ ++ assert_different_registers(t1, t0, mdp_in, reg); ++ ++ Address addr1(mdp_in, constant); ++ Address addr2(t1, 0); ++ Address &addr = addr1; ++ if (reg != noreg) { ++ la(t1, addr1); ++ add(t1, t1, reg); ++ addr = addr2; ++ } ++ ++ if (decrement) { ++ ld(t0, addr); ++ addi(t0, t0, -DataLayout::counter_increment); ++ Label L; ++ bltz(t0, L); // skip store if counter underflow ++ sd(t0, addr); ++ bind(L); ++ } else { ++ assert(DataLayout::counter_increment == 1, ++ "flow-free idiom only works with 1"); ++ ld(t0, addr); ++ addi(t0, t0, DataLayout::counter_increment); ++ Label L; ++ blez(t0, L); // skip store if counter overflow ++ sd(t0, addr); ++ bind(L); ++ } ++} ++ ++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, ++ int flag_byte_constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ int flags_offset = in_bytes(DataLayout::flags_offset()); ++ // Set the flag ++ lbu(t1, Address(mdp_in, flags_offset)); ++ ori(t1, t1, flag_byte_constant); ++ sb(t1, Address(mdp_in, flags_offset)); ++} ++ ++ ++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, ++ int offset, ++ Register value, ++ Register test_value_out, ++ Label& not_equal_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if (test_value_out == noreg) { ++ ld(t1, Address(mdp_in, offset)); ++ bne(value, t1, not_equal_continue); ++ } else { ++ // Put the test value into a register, so caller can use it: ++ ld(test_value_out, Address(mdp_in, offset)); ++ bne(value, test_value_out, not_equal_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld(t1, Address(mdp_in, offset_of_disp)); ++ add(mdp_in, mdp_in, t1); ++ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ Register reg, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ add(t1, mdp_in, reg); ++ ld(t1, Address(t1, offset_of_disp)); ++ add(mdp_in, mdp_in, t1); ++ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, ++ int constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ addi(mdp_in, mdp_in, constant); ++ sd(mdp_in, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ++ // save/restore across call_VM ++ addi(sp, sp, -2 * wordSize); ++ sd(zr, Address(sp, 0)); ++ sd(return_bci, Address(sp, wordSize)); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), ++ return_bci); ++ ld(zr, Address(sp, 0)); ++ ld(return_bci, Address(sp, wordSize)); ++ addi(sp, sp, 2 * wordSize); ++} ++ ++void InterpreterMacroAssembler::profile_taken_branch(Register mdp, ++ Register bumped_count) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ // Otherwise, assign to mdp ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the taken count. ++ Address data(mdp, in_bytes(JumpData::taken_offset())); ++ ld(bumped_count, data); ++ assert(DataLayout::counter_increment == 1, ++ "flow-free idiom only works with 1"); ++ addi(bumped_count, bumped_count, DataLayout::counter_increment); ++ Label L; ++ // eg: bumped_count=0x7fff ffff ffff ffff + 1 < 0. so we use <= 0; ++ blez(bumped_count, L); // skip store if counter overflow, ++ sd(bumped_count, data); ++ bind(L); ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the not taken count. ++ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); ++ ++ // The method data pointer needs to be updated to correspond to ++ // the next bytecode ++ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_final_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_virtual_call(Register receiver, ++ Register mdp, ++ Register reg2, ++ bool receiver_can_be_null) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label skip_receiver_profile; ++ if (receiver_can_be_null) { ++ Label not_null; ++ // We are making a call. Increment the count for null receiver. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ j(skip_receiver_profile); ++ bind(not_null); ++ } ++ ++ // Record the receiver type. ++ record_klass_in_profile(receiver, mdp, reg2, true); ++ bind(skip_receiver_profile); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++// This routine creates a state machine for updating the multi-row ++// type profile at a virtual call site (or other type-sensitive bytecode). ++// The machine visits each row (of receiver/count) until the receiver type ++// is found, or until it runs out of rows. At the same time, it remembers ++// the location of the first empty row. (An empty row records null for its ++// receiver, and can be allocated for a newly-observed receiver type.) ++// Because there are two degrees of freedom in the state, a simple linear ++// search will not work; it must be a decision tree. Hence this helper ++// function is recursive, to generate the required tree structured code. ++// It's the interpreter, so we are trading off code space for speed. ++// See below for example code. ++void InterpreterMacroAssembler::record_klass_in_profile_helper( ++ Register receiver, Register mdp, ++ Register reg2, ++ Label& done, bool is_virtual_call) { ++ if (TypeProfileWidth == 0) { ++ if (is_virtual_call) { ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ } ++ ++ } else { ++ int non_profiled_offset = -1; ++ if (is_virtual_call) { ++ non_profiled_offset = in_bytes(CounterData::count_offset()); ++ } ++ ++ record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, ++ &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); ++ } ++} ++ ++void InterpreterMacroAssembler::record_item_in_profile_helper( ++ Register item, Register mdp, Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, int non_profiled_offset) { ++ int last_row = total_rows - 1; ++ assert(start_row <= last_row, "must be work left to do"); ++ // Test this row for both the item and for null. ++ // Take any of three different outcomes: ++ // 1. found item => increment count and goto done ++ // 2. found null => keep looking for case 1, maybe allocate this cell ++ // 3. found something else => keep looking for cases 1 and 2 ++ // Case 3 is handled by a recursive call. ++ for (int row = start_row; row <= last_row; row++) { ++ Label next_test; ++ bool test_for_null_also = (row == start_row); ++ ++ // See if the item is item[n]. ++ int item_offset = in_bytes(item_offset_fn(row)); ++ test_mdp_data_at(mdp, item_offset, item, ++ (test_for_null_also ? reg2 : noreg), ++ next_test); ++ // (Reg2 now contains the item from the CallData.) ++ ++ // The item is item[n]. Increment count[n]. ++ int count_offset = in_bytes(item_count_offset_fn(row)); ++ increment_mdp_data_at(mdp, count_offset); ++ j(done); ++ bind(next_test); ++ ++ if (test_for_null_also) { ++ Label found_null; ++ // Failed the equality check on item[n]... Test for null. ++ if (start_row == last_row) { ++ // The only thing left to do is handle the null case. ++ if (non_profiled_offset >= 0) { ++ beqz(reg2, found_null); ++ // Item did not match any saved item and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ increment_mdp_data_at(mdp, non_profiled_offset); ++ j(done); ++ bind(found_null); ++ } else { ++ bnez(reg2, done); ++ } ++ break; ++ } ++ // Since null is rare, make it be the branch-taken case. ++ beqz(reg2, found_null); ++ ++ // Put all the "Case 3" tests here. ++ record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, ++ item_offset_fn, item_count_offset_fn, non_profiled_offset); ++ ++ // Found a null. Keep searching for a matching item, ++ // but remember that this is an empty (unused) slot. ++ bind(found_null); ++ } ++ } ++ ++ // In the fall-through case, we found no matching item, but we ++ // observed the item[start_row] is NULL. ++ // Fill in the item field and increment the count. ++ int item_offset = in_bytes(item_offset_fn(start_row)); ++ set_mdp_data_at(mdp, item_offset, item); ++ int count_offset = in_bytes(item_count_offset_fn(start_row)); ++ mv(reg2, DataLayout::counter_increment); ++ set_mdp_data_at(mdp, count_offset, reg2); ++ if (start_row > 0) { ++ j(done); ++ } ++} ++ ++// Example state machine code for three profile rows: ++// # main copy of decision tree, rooted at row[1] ++// if (row[0].rec == rec) then [ ++// row[0].incr() ++// goto done ++// ] ++// if (row[0].rec != NULL) then [ ++// # inner copy of decision tree, rooted at row[1] ++// if (row[1].rec == rec) then [ ++// row[1].incr() ++// goto done ++// ] ++// if (row[1].rec != NULL) then [ ++// # degenerate decision tree, rooted at row[2] ++// if (row[2].rec == rec) then [ ++// row[2].incr() ++// goto done ++// ] ++// if (row[2].rec != NULL) then [ ++// count.incr() ++// goto done ++// ] # overflow ++// row[2].init(rec) ++// goto done ++// ] else [ ++// # remember row[1] is empty ++// if (row[2].rec == rec) then [ ++// row[2].incr() ++// goto done ++// ] ++// row[1].init(rec) ++// goto done ++// ] ++// else [ ++// # remember row[0] is empty ++// if (row[1].rec == rec) then [ ++// row[1].incr() ++// goto done ++// ] ++// if (row[2].rec == rec) then [ ++// row[2].incr() ++// goto done ++// ] ++// row[0].init(rec) ++// goto done ++// ] ++// done: ++ ++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, ++ Register mdp, Register reg2, ++ bool is_virtual_call) { ++ assert(ProfileInterpreter, "must be profiling"); ++ Label done; ++ ++ record_klass_in_profile_helper(receiver, mdp, reg2, done, is_virtual_call); ++ ++ bind(done); ++} ++ ++void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the total ret count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ for (uint row = 0; row < RetData::row_limit(); row++) { ++ Label next_test; ++ ++ // See if return_bci is equal to bci[n]: ++ test_mdp_data_at(mdp, ++ in_bytes(RetData::bci_offset(row)), ++ return_bci, noreg, ++ next_test); ++ ++ // return_bci is equal to bci[n]. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, ++ in_bytes(RetData::bci_displacement_offset(row))); ++ j(profile_continue); ++ bind(next_test); ++ } ++ ++ update_mdp_for_ret(return_bci); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_null_seen(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { ++ if (ProfileInterpreter && TypeProfileCasts) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int count_offset = in_bytes(CounterData::count_offset()); ++ // Back up the address, since we have already bumped the mdp. ++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // *Decrement* the counter. We expect to see zero or small negatives. ++ increment_mdp_data_at(mdp, count_offset, true); ++ ++ bind (profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // Record the object type. ++ record_klass_in_profile(klass, mdp, reg2, false); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_switch_default(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the default case count ++ increment_mdp_data_at(mdp, ++ in_bytes(MultiBranchData::default_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ in_bytes(MultiBranchData:: ++ default_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_switch_case(Register index, ++ Register mdp, ++ Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Build the base (index * per_case_size_in_bytes()) + ++ // case_array_offset_in_bytes() ++ mvw(reg2, in_bytes(MultiBranchData::per_case_size())); ++ mvw(t0, in_bytes(MultiBranchData::case_array_offset())); ++ Assembler::mul(index, index, reg2); ++ Assembler::add(index, index, t0); ++ ++ // Update the case count ++ increment_mdp_data_at(mdp, ++ index, ++ in_bytes(MultiBranchData::relative_count_offset())); ++ ++ // The method data pointer need to be updated. ++ update_mdp_by_offset(mdp, ++ index, ++ in_bytes(MultiBranchData:: ++ relative_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; } ++ ++void InterpreterMacroAssembler::notify_method_entry() { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); ++ beqz(x13, L); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_method_entry)); ++ bind(L); ++ } ++ ++ { ++ SkipIfEqual skip(this, &DTraceMethodProbes, false); ++ get_method(c_rarg1); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ xthread, c_rarg1); ++ } ++ ++ // RedefineClasses() tracing support for obsolete method entry ++ if (log_is_enabled(Trace, redefine, class, obsolete)) { ++ get_method(c_rarg1); ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), ++ xthread, c_rarg1); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::notify_method_exit( ++ TosState state, NotifyMethodExitMode mode) { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ // Note: frame::interpreter_frame_result has a dependency on how the ++ // method result is saved across the call to post_method_exit. If this ++ // is changed then the interpreter_frame_result implementation will ++ // need to be updated too. ++ ++ // template interpreter will leave the result on the top of the stack. ++ push(state); ++ lwu(x13, Address(xthread, JavaThread::interp_only_mode_offset())); ++ beqz(x13, L); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); ++ bind(L); ++ pop(state); ++ } ++ ++ { ++ SkipIfEqual skip(this, &DTraceMethodProbes, false); ++ push(state); ++ get_method(c_rarg1); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ xthread, c_rarg1); ++ pop(state); ++ } ++} ++ ++ ++// Jump if ((*counter_addr += increment) & mask) satisfies the condition. ++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, ++ int increment, Address mask, ++ Register tmp1, Register tmp2, ++ bool preloaded, Label* where) { ++ Label done; ++ if (!preloaded) { ++ lwu(tmp1, counter_addr); ++ } ++ add(tmp1, tmp1, increment); ++ sw(tmp1, counter_addr); ++ lwu(tmp2, mask); ++ andr(tmp1, tmp1, tmp2); ++ bnez(tmp1, done); ++ j(*where); // offset is too large so we have to use j instead of beqz here ++ bind(done); ++} ++ ++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments) { ++ // interpreter specific ++ // ++ // Note: No need to save/restore rbcp & rlocals pointer since these ++ // are callee saved registers and no blocking/ GC can happen ++ // in leaf calls. ++#ifdef ASSERT ++ { ++ Label L; ++ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ beqz(t0, L); ++ stop("InterpreterMacroAssembler::call_VM_leaf_base:" ++ " last_sp != NULL"); ++ bind(L); ++ } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++} ++ ++void InterpreterMacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // interpreter specific ++ // ++ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't ++ // really make a difference for these runtime calls, since they are ++ // slow anyway. Btw., bcp must be saved/restored since it may change ++ // due to GC. ++ save_bcp(); ++#ifdef ASSERT ++ { ++ Label L; ++ ld(t0, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ beqz(t0, L); ++ stop("InterpreterMacroAssembler::call_VM_base:" ++ " last_sp != NULL"); ++ bind(L); ++ } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp, ++ entry_point, number_of_arguments, ++ check_exceptions); ++// interpreter specific ++ restore_bcp(); ++ restore_locals(); ++} ++ ++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr, Register tmp) { ++ assert_different_registers(obj, tmp, t0, mdo_addr.base()); ++ Label update, next, none; ++ ++ verify_oop(obj); ++ ++ bnez(obj, update); ++ orptr(mdo_addr, TypeEntries::null_seen, t0, tmp); ++ j(next); ++ ++ bind(update); ++ load_klass(obj, obj); ++ ++ ld(t0, mdo_addr); ++ xorr(obj, obj, t0); ++ andi(t0, obj, TypeEntries::type_klass_mask); ++ beqz(t0, next); // klass seen before, nothing to ++ // do. The unknown bit may have been ++ // set already but no need to check. ++ ++ andi(t0, obj, TypeEntries::type_unknown); ++ bnez(t0, next); ++ // already unknown. Nothing to do anymore. ++ ++ ld(t0, mdo_addr); ++ beqz(t0, none); ++ mv(tmp, (u1)TypeEntries::null_seen); ++ beq(t0, tmp, none); ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ ld(t0, mdo_addr); ++ xorr(obj, obj, t0); ++ andi(t0, obj, TypeEntries::type_klass_mask); ++ beqz(t0, next); ++ ++ // different than before. Cannot keep accurate profile. ++ orptr(mdo_addr, TypeEntries::type_unknown, t0, tmp); ++ j(next); ++ ++ bind(none); ++ // first time here. Set profile type. ++ sd(obj, mdo_addr); ++ ++ bind(next); ++} ++ ++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { ++ if (!ProfileInterpreter) { ++ return; ++ } ++ ++ if (MethodData::profile_arguments() || MethodData::profile_return()) { ++ Label profile_continue; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); ++ ++ lbu(t0, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start)); ++ if (is_virtual) { ++ mv(tmp, (u1)DataLayout::virtual_call_type_data_tag); ++ bne(t0, tmp, profile_continue); ++ } else { ++ mv(tmp, (u1)DataLayout::call_type_data_tag); ++ bne(t0, tmp, profile_continue); ++ } ++ ++ // calculate slot step ++ static int stack_slot_offset0 = in_bytes(TypeEntriesAtCall::stack_slot_offset(0)); ++ static int slot_step = in_bytes(TypeEntriesAtCall::stack_slot_offset(1)) - stack_slot_offset0; ++ ++ // calculate type step ++ static int argument_type_offset0 = in_bytes(TypeEntriesAtCall::argument_type_offset(0)); ++ static int type_step = in_bytes(TypeEntriesAtCall::argument_type_offset(1)) - argument_type_offset0; ++ ++ if (MethodData::profile_arguments()) { ++ Label done, loop, loopEnd, profileArgument, profileReturnType; ++ RegSet pushed_registers; ++ pushed_registers += x15; ++ pushed_registers += x16; ++ pushed_registers += x17; ++ Register mdo_addr = x15; ++ Register index = x16; ++ Register off_to_args = x17; ++ push_reg(pushed_registers, sp); ++ ++ mv(off_to_args, in_bytes(TypeEntriesAtCall::args_data_offset())); ++ mv(t0, TypeProfileArgsLimit); ++ beqz(t0, loopEnd); ++ ++ mv(index, zr); // index < TypeProfileArgsLimit ++ bind(loop); ++ bgtz(index, profileReturnType); ++ mv(t0, (int)MethodData::profile_return()); ++ beqz(t0, profileArgument); // (index > 0 || MethodData::profile_return()) == false ++ bind(profileReturnType); ++ // If return value type is profiled we may have no argument to profile ++ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); ++ mv(t1, - TypeStackSlotEntries::per_arg_count()); ++ mul(t1, index, t1); ++ add(tmp, tmp, t1); ++ mv(t1, TypeStackSlotEntries::per_arg_count()); ++ add(t0, mdp, off_to_args); ++ blt(tmp, t1, done); ++ ++ bind(profileArgument); ++ ++ ld(tmp, Address(callee, Method::const_offset())); ++ load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset())); ++ // stack offset o (zero based) from the start of the argument ++ // list, for n arguments translates into offset n - o - 1 from ++ // the end of the argument list ++ mv(t0, stack_slot_offset0); ++ mv(t1, slot_step); ++ mul(t1, index, t1); ++ add(t0, t0, t1); ++ add(t0, mdp, t0); ++ ld(t0, Address(t0)); ++ sub(tmp, tmp, t0); ++ addi(tmp, tmp, -1); ++ Address arg_addr = argument_address(tmp); ++ ld(tmp, arg_addr); ++ ++ mv(t0, argument_type_offset0); ++ mv(t1, type_step); ++ mul(t1, index, t1); ++ add(t0, t0, t1); ++ add(mdo_addr, mdp, t0); ++ Address mdo_arg_addr(mdo_addr, 0); ++ profile_obj_type(tmp, mdo_arg_addr, t1); ++ ++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); ++ addi(off_to_args, off_to_args, to_add); ++ ++ // increment index by 1 ++ addi(index, index, 1); ++ mv(t1, TypeProfileArgsLimit); ++ blt(index, t1, loop); ++ bind(loopEnd); ++ ++ if (MethodData::profile_return()) { ++ ld(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset()))); ++ addi(tmp, tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); ++ } ++ ++ add(t0, mdp, off_to_args); ++ bind(done); ++ mv(mdp, t0); ++ ++ // unspill the clobbered registers ++ pop_reg(pushed_registers, sp); ++ ++ if (MethodData::profile_return()) { ++ // We're right after the type profile for the last ++ // argument. tmp is the number of cells left in the ++ // CallTypeData/VirtualCallTypeData to reach its end. Non null ++ // if there's a return to profile. ++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); ++ shadd(mdp, tmp, mdp, tmp, exact_log2(DataLayout::cell_size)); ++ } ++ sd(mdp, Address(fp, frame::interpreter_frame_mdp_offset * wordSize)); ++ } else { ++ assert(MethodData::profile_return(), "either profile call args or call ret"); ++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ } ++ ++ // mdp points right after the end of the ++ // CallTypeData/VirtualCallTypeData, right after the cells for the ++ // return value type if there's one ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { ++ assert_different_registers(mdp, ret, tmp, xbcp, t0, t1); ++ if (ProfileInterpreter && MethodData::profile_return()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ if (MethodData::profile_return_jsr292_only()) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ // If we don't profile all invoke bytecodes we must make sure ++ // it's a bytecode we indeed profile. We can't go back to the ++ // begining of the ProfileData we intend to update to check its ++ // type because we're right after it and we don't known its ++ // length ++ Label do_profile; ++ lbu(t0, Address(xbcp, 0)); ++ mv(tmp, (u1)Bytecodes::_invokedynamic); ++ beq(t0, tmp, do_profile); ++ mv(tmp, (u1)Bytecodes::_invokehandle); ++ beq(t0, tmp, do_profile); ++ get_method(tmp); ++ lhu(t0, Address(tmp, Method::intrinsic_id_offset_in_bytes())); ++ mv(t1, vmIntrinsics::_compiledLambdaForm); ++ bne(t0, t1, profile_continue); ++ bind(do_profile); ++ } ++ ++ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); ++ mv(tmp, ret); ++ profile_obj_type(tmp, mdo_ret_addr, t1); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3) { ++ assert_different_registers(t0, t1, mdp, tmp1, tmp2, tmp3); ++ if (ProfileInterpreter && MethodData::profile_parameters()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Load the offset of the area within the MDO used for ++ // parameters. If it's negative we're not profiling any parameters ++ lwu(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()))); ++ srli(tmp2, tmp1, 31); ++ bnez(tmp2, profile_continue); // i.e. sign bit set ++ ++ // Compute a pointer to the area for parameters from the offset ++ // and move the pointer to the slot for the last ++ // parameters. Collect profiling from last parameter down. ++ // mdo start + parameters offset + array length - 1 ++ add(mdp, mdp, tmp1); ++ ld(tmp1, Address(mdp, ArrayData::array_len_offset())); ++ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); ++ ++ Label loop; ++ bind(loop); ++ ++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); ++ int type_base = in_bytes(ParametersTypeData::type_offset(0)); ++ int per_arg_scale = exact_log2(DataLayout::cell_size); ++ add(t0, mdp, off_base); ++ add(t1, mdp, type_base); ++ ++ ++ shadd(tmp2, tmp1, t0, tmp2, per_arg_scale); ++ // load offset on the stack from the slot for this parameter ++ ld(tmp2, Address(tmp2, 0)); ++ neg(tmp2, tmp2); ++ ++ // read the parameter from the local area ++ shadd(tmp2, tmp2, xlocals, tmp2, Interpreter::logStackElementSize); ++ ld(tmp2, Address(tmp2, 0)); ++ ++ // profile the parameter ++ shadd(t1, tmp1, t1, t0, per_arg_scale); ++ Address arg_type(t1, 0); ++ profile_obj_type(tmp2, arg_type, tmp3); ++ ++ // go to next parameter ++ add(tmp1, tmp1, - TypeStackSlotEntries::per_arg_count()); ++ bgez(tmp1, loop); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::get_method_counters(Register method, ++ Register mcs, Label& skip) { ++ Label has_counters; ++ ld(mcs, Address(method, Method::method_counters_offset())); ++ bnez(mcs, has_counters); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), method); ++ ld(mcs, Address(method, Method::method_counters_offset())); ++ beqz(mcs, skip); // No MethodCounters allocated, OutOfMemory ++ bind(has_counters); ++} ++ ++#ifdef ASSERT ++void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag_bits, ++ const char* msg, bool stop_by_hit) { ++ Label L; ++ andi(t0, access_flags, flag_bits); ++ if (stop_by_hit) { ++ beqz(t0, L); ++ } else { ++ bnez(t0, L); ++ } ++ stop(msg); ++ bind(L); ++} ++ ++void InterpreterMacroAssembler::verify_frame_setup() { ++ Label L; ++ const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ ld(t0, monitor_block_top); ++ beq(esp, t0, L); ++ stop("broken stack frame setup in interpreter"); ++ bind(L); ++} ++#endif +diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +new file mode 100644 +index 000000000..042ee8280 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +@@ -0,0 +1,283 @@ ++/* ++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_INTERP_MASM_RISCV_HPP ++#define CPU_RISCV_INTERP_MASM_RISCV_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "interpreter/invocationCounter.hpp" ++#include "runtime/frame.hpp" ++ ++// This file specializes the assember with interpreter-specific macros ++ ++typedef ByteSize (*OffsetFunction)(uint); ++ ++class InterpreterMacroAssembler: public MacroAssembler { ++ protected: ++ // Interpreter specific version of call_VM_base ++ using MacroAssembler::call_VM_leaf_base; ++ ++ virtual void call_VM_leaf_base(address entry_point, ++ int number_of_arguments); ++ ++ virtual void call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions); ++ ++ // base routine for all dispatches ++ void dispatch_base(TosState state, address* table, bool verifyoop = true, ++ bool generate_poll = false, Register Rs = t0); ++ ++ public: ++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {} ++ virtual ~InterpreterMacroAssembler() {} ++ ++ void load_earlyret_value(TosState state); ++ ++ void jump_to_entry(address entry); ++ ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ // Interpreter-specific registers ++ void save_bcp() { ++ sd(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); ++ } ++ ++ void restore_bcp() { ++ ld(xbcp, Address(fp, frame::interpreter_frame_bcp_offset * wordSize)); ++ } ++ ++ void restore_locals() { ++ ld(xlocals, Address(fp, frame::interpreter_frame_locals_offset * wordSize)); ++ } ++ ++ void restore_constant_pool_cache() { ++ ld(xcpool, Address(fp, frame::interpreter_frame_cache_offset * wordSize)); ++ } ++ ++ void get_dispatch(); ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg) { ++ ld(reg, Address(fp, frame::interpreter_frame_method_offset * wordSize)); ++ } ++ ++ void get_const(Register reg) { ++ get_method(reg); ++ ld(reg, Address(reg, in_bytes(Method::const_offset()))); ++ } ++ ++ void get_constant_pool(Register reg) { ++ get_const(reg); ++ ld(reg, Address(reg, in_bytes(ConstMethod::constants_offset()))); ++ } ++ ++ void get_constant_pool_cache(Register reg) { ++ get_constant_pool(reg); ++ ld(reg, Address(reg, ConstantPool::cache_offset_in_bytes())); ++ } ++ ++ void get_cpool_and_tags(Register cpool, Register tags) { ++ get_constant_pool(cpool); ++ ld(tags, Address(cpool, ConstantPool::tags_offset_in_bytes())); ++ } ++ ++ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); ++ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_method_counters(Register method, Register mcs, Label& skip); ++ ++ // Load cpool->resolved_references(index). ++ void load_resolved_reference_at_index(Register result, Register index, Register tmp = x15); ++ ++ // Load cpool->resolved_klass_at(index). ++ void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); ++ ++ void pop_ptr(Register r = x10); ++ void pop_i(Register r = x10); ++ void pop_l(Register r = x10); ++ void pop_f(FloatRegister r = f10); ++ void pop_d(FloatRegister r = f10); ++ void push_ptr(Register r = x10); ++ void push_i(Register r = x10); ++ void push_l(Register r = x10); ++ void push_f(FloatRegister r = f10); ++ void push_d(FloatRegister r = f10); ++ ++ void pop(TosState state); // transition vtos -> state ++ void push(TosState state); // transition state -> vtos ++ ++ void empty_expression_stack() { ++ ld(esp, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); ++ // NULL last_sp until next java call ++ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ } ++ ++ // Helpers for swap and dup ++ void load_ptr(int n, Register val); ++ void store_ptr(int n, Register val); ++ ++// Load float value from 'address'. The value is loaded onto the FPU register v0. ++ void load_float(Address src); ++ void load_double(Address src); ++ ++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is ++ // a subtype of super_klass. ++ void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); ++ ++ // Dispatching ++ void dispatch_prolog(TosState state, int step = 0); ++ void dispatch_epilog(TosState state, int step = 0); ++ // dispatch via t0 ++ void dispatch_only(TosState state, bool generate_poll = false, Register Rs = t0); ++ // dispatch normal table via t0 (assume t0 is loaded already) ++ void dispatch_only_normal(TosState state, Register Rs = t0); ++ void dispatch_only_noverify(TosState state, Register Rs = t0); ++ // load t0 from [xbcp + step] and dispatch via t0 ++ void dispatch_next(TosState state, int step = 0, bool generate_poll = false); ++ // load t0 from [xbcp] and dispatch via t0 and table ++ void dispatch_via (TosState state, address* table); ++ ++ // jump to an invoked target ++ void prepare_to_jump_from_interpreted(); ++ void jump_from_interpreted(Register method); ++ ++ ++ // Returning from interpreted functions ++ // ++ // Removes the current activation (incl. unlocking of monitors) ++ // and sets up the return address. This code is also used for ++ // exception unwindwing. In that case, we do not want to throw ++ // IllegalMonitorStateExceptions, since that might get us into an ++ // infinite rethrow exception loop. ++ // Additionally this code is used for popFrame and earlyReturn. ++ // In popFrame case we want to skip throwing an exception, ++ // installing an exception, and notifying jvmdi. ++ // In earlyReturn case we only want to skip throwing an exception ++ // and installing an exception. ++ void remove_activation(TosState state, ++ bool throw_monitor_exception = true, ++ bool install_monitor_exception = true, ++ bool notify_jvmdi = true); ++ ++ // FIXME: Give us a valid frame at a null check. ++ virtual void null_check(Register reg, int offset = -1) { ++ MacroAssembler::null_check(reg, offset); ++ } ++ ++ // Object locking ++ void lock_object (Register lock_reg); ++ void unlock_object(Register lock_reg); ++ ++ // Interpreter profiling operations ++ void set_method_data_pointer_for_bcp(); ++ void test_method_data_pointer(Register mdp, Label& zero_continue); ++ void verify_method_data_pointer(); ++ ++ void set_mdp_data_at(Register mdp_in, int constant, Register value); ++ void increment_mdp_data_at(Address data, bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, int constant, ++ bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, ++ bool decrement = false); ++ void increment_mask_and_jump(Address counter_addr, ++ int increment, Address mask, ++ Register tmp1, Register tmp2, ++ bool preloaded, Label* where); ++ ++ void set_mdp_flag_at(Register mdp_in, int flag_constant); ++ void test_mdp_data_at(Register mdp_in, int offset, Register value, ++ Register test_value_out, ++ Label& not_equal_continue); ++ ++ void record_klass_in_profile(Register receiver, Register mdp, ++ Register reg2, bool is_virtual_call); ++ void record_klass_in_profile_helper(Register receiver, Register mdp, ++ Register reg2, ++ Label& done, bool is_virtual_call); ++ void record_item_in_profile_helper(Register item, Register mdp, ++ Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, ++ int non_profiled_offset); ++ ++ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); ++ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); ++ void update_mdp_by_constant(Register mdp_in, int constant); ++ void update_mdp_for_ret(Register return_bci); ++ ++ // narrow int return value ++ void narrow(Register result); ++ ++ void profile_taken_branch(Register mdp, Register bumped_count); ++ void profile_not_taken_branch(Register mdp); ++ void profile_call(Register mdp); ++ void profile_final_call(Register mdp); ++ void profile_virtual_call(Register receiver, Register mdp, ++ Register t1, ++ bool receiver_can_be_null = false); ++ void profile_ret(Register return_bci, Register mdp); ++ void profile_null_seen(Register mdp); ++ void profile_typecheck(Register mdp, Register klass, Register temp); ++ void profile_typecheck_failed(Register mdp); ++ void profile_switch_default(Register mdp); ++ void profile_switch_case(Register index_in_scratch, Register mdp, ++ Register temp); ++ ++ void profile_obj_type(Register obj, const Address& mdo_addr, Register tmp); ++ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); ++ void profile_return_type(Register mdp, Register ret, Register tmp); ++ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3); ++ ++ // Debugging ++ // only if +VerifyFPU && (state == ftos || state == dtos) ++ void verify_FPU(int stack_depth, TosState state = ftos); ++ ++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; ++ ++ // support for jvmti/dtrace ++ void notify_method_entry(); ++ void notify_method_exit(TosState state, NotifyMethodExitMode mode); ++ ++ virtual void _call_Unimplemented(address call_site) { ++ save_bcp(); ++ set_last_Java_frame(esp, fp, (address) pc(), t0); ++ MacroAssembler::_call_Unimplemented(call_site); ++ } ++ ++#ifdef ASSERT ++ void verify_access_flags(Register access_flags, uint32_t flag_bits, ++ const char* msg, bool stop_by_hit = true); ++ void verify_frame_setup(); ++#endif ++}; ++ ++#endif // CPU_RISCV_INTERP_MASM_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp +new file mode 100644 +index 000000000..777f326e3 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.cpp +@@ -0,0 +1,296 @@ ++/* ++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/signature.hpp" ++ ++#define __ _masm-> ++ ++// Implementation of SignatureHandlerGenerator ++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return xlocals; } ++Register InterpreterRuntime::SignatureHandlerGenerator::to() { return sp; } ++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return t0; } ++ ++Register InterpreterRuntime::SignatureHandlerGenerator::next_gpr() { ++ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { ++ return g_INTArgReg[++_num_reg_int_args]; ++ } ++ return noreg; ++} ++ ++FloatRegister InterpreterRuntime::SignatureHandlerGenerator::next_fpr() { ++ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { ++ return g_FPArgReg[_num_reg_fp_args++]; ++ } ++ return fnoreg; ++} ++ ++int InterpreterRuntime::SignatureHandlerGenerator::next_stack_offset() { ++ int ret = _stack_offset; ++ _stack_offset += wordSize; ++ return ret; ++} ++ ++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( ++ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); // allocate on resourse area by default ++ _num_reg_int_args = (method->is_static() ? 1 : 0); ++ _num_reg_fp_args = 0; ++ _stack_offset = 0; ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { ++ const Address src(from(), Interpreter::local_offset_in_bytes(offset())); ++ ++ Register reg = next_gpr(); ++ if (reg != noreg) { ++ __ lw(reg, src); ++ } else { ++ __ lw(x10, src); ++ __ sw(x10, Address(to(), next_stack_offset())); ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { ++ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ ++ Register reg = next_gpr(); ++ if (reg != noreg) { ++ __ ld(reg, src); ++ } else { ++ __ ld(x10, src); ++ __ sd(x10, Address(to(), next_stack_offset())); ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { ++ const Address src(from(), Interpreter::local_offset_in_bytes(offset())); ++ ++ FloatRegister reg = next_fpr(); ++ if (reg != fnoreg) { ++ __ flw(reg, src); ++ } else { ++ // a floating-point argument is passed according to the integer calling ++ // convention if no floating-point argument register available ++ pass_int(); ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { ++ const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ ++ FloatRegister reg = next_fpr(); ++ if (reg != fnoreg) { ++ __ fld(reg, src); ++ } else { ++ // a floating-point argument is passed according to the integer calling ++ // convention if no floating-point argument register available ++ pass_long(); ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { ++ Register reg = next_gpr(); ++ if (reg == c_rarg1) { ++ assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); ++ __ addi(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); ++ } else if (reg != noreg) { ++ // c_rarg2-c_rarg7 ++ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ mv(reg, zr); //_num_reg_int_args:c_rarg -> 1:c_rarg2, 2:c_rarg3... ++ __ ld(temp(), x10); ++ Label L; ++ __ beqz(temp(), L); ++ __ mv(reg, x10); ++ __ bind(L); ++ } else { ++ //to stack ++ __ addi(x10, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ ld(temp(), x10); ++ Label L; ++ __ bnez(temp(), L); ++ __ mv(x10, zr); ++ __ bind(L); ++ assert(sizeof(jobject) == wordSize, ""); ++ __ sd(x10, Address(to(), next_stack_offset())); ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { ++ // generate code to handle arguments ++ iterate(fingerprint); ++ ++ // return result handler ++ __ la(x10, ExternalAddress(Interpreter::result_handler(method()->result_type()))); ++ __ ret(); ++ ++ __ flush(); ++} ++ ++ ++// Implementation of SignatureHandlerLibrary ++ ++void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ ++ ++class SlowSignatureHandler ++ : public NativeSignatureIterator { ++ private: ++ address _from; ++ intptr_t* _to; ++ intptr_t* _int_args; ++ intptr_t* _fp_args; ++ intptr_t* _fp_identifiers; ++ unsigned int _num_reg_int_args; ++ unsigned int _num_reg_fp_args; ++ ++ ++ intptr_t* single_slot_addr() { ++ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ return from_addr; ++ } ++ ++ intptr_t* double_slot_addr() { ++ intptr_t* from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(1)); ++ _from -= 2 * Interpreter::stackElementSize; ++ return from_addr; ++ } ++ ++ int pass_gpr(intptr_t value) { ++ if (_num_reg_int_args < Argument::n_int_register_parameters_c - 1) { ++ *_int_args++ = value; ++ return _num_reg_int_args++; ++ } ++ return -1; ++ } ++ ++ int pass_fpr(intptr_t value) { ++ if (_num_reg_fp_args < Argument::n_float_register_parameters_c) { ++ *_fp_args++ = value; ++ return _num_reg_fp_args++; ++ } ++ return -1; ++ } ++ ++ void pass_stack(intptr_t value) { ++ *_to++ = value; ++ } ++ ++ ++ virtual void pass_int() { ++ jint value = *(jint*)single_slot_addr(); ++ if (pass_gpr(value) < 0) { ++ pass_stack(value); ++ } ++ } ++ ++ ++ virtual void pass_long() { ++ intptr_t value = *double_slot_addr(); ++ if (pass_gpr(value) < 0) { ++ pass_stack(value); ++ } ++ } ++ ++ virtual void pass_object() { ++ intptr_t* addr = single_slot_addr(); ++ intptr_t value = *addr == 0 ? NULL : (intptr_t)addr; ++ if (pass_gpr(value) < 0) { ++ pass_stack(value); ++ } ++ } ++ ++ virtual void pass_float() { ++ jint value = *(jint*) single_slot_addr(); ++ // a floating-point argument is passed according to the integer calling ++ // convention if no floating-point argument register available ++ if (pass_fpr(value) < 0 && pass_gpr(value) < 0) { ++ pass_stack(value); ++ } ++ } ++ ++ virtual void pass_double() { ++ intptr_t value = *double_slot_addr(); ++ int arg = pass_fpr(value); ++ if (0 <= arg) { ++ *_fp_identifiers |= (1ull << arg); // mark as double ++ } else if (pass_gpr(value) < 0) { // no need to mark if passing by integer registers or stack ++ pass_stack(value); ++ } ++ } ++ ++ public: ++ SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) ++ : NativeSignatureIterator(method) ++ { ++ _from = from; ++ _to = to; ++ ++ _int_args = to - (method->is_static() ? 16 : 17); ++ _fp_args = to - 8; ++ _fp_identifiers = to - 9; ++ *(int*) _fp_identifiers = 0; ++ _num_reg_int_args = (method->is_static() ? 1 : 0); ++ _num_reg_fp_args = 0; ++ } ++ ~SlowSignatureHandler() ++ { ++ _from = NULL; ++ _to = NULL; ++ _int_args = NULL; ++ _fp_args = NULL; ++ _fp_identifiers = NULL; ++ } ++}; ++ ++ ++IRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* thread, ++ Method* method, ++ intptr_t* from, ++ intptr_t* to)) ++ methodHandle m(thread, (Method*)method); ++ assert(m->is_native(), "sanity check"); ++ ++ // handle arguments ++ SlowSignatureHandler ssh(m, (address)from, to); ++ ssh.iterate((uint64_t)UCONST64(-1)); ++ ++ // return result handler ++ return Interpreter::result_handler(m->result_type()); ++IRT_END +diff --git a/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp +new file mode 100644 +index 000000000..06342869f +--- /dev/null ++++ b/src/hotspot/cpu/riscv/interpreterRT_riscv.hpp +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_INTERPRETERRT_RISCV_HPP ++#define CPU_RISCV_INTERPRETERRT_RISCV_HPP ++ ++// This is included in the middle of class Interpreter. ++// Do not include files here. ++ ++// native method calls ++ ++class SignatureHandlerGenerator: public NativeSignatureIterator { ++ private: ++ MacroAssembler* _masm; ++ unsigned int _num_reg_fp_args; ++ unsigned int _num_reg_int_args; ++ int _stack_offset; ++ ++ void pass_int(); ++ void pass_long(); ++ void pass_float(); ++ void pass_double(); ++ void pass_object(); ++ ++ Register next_gpr(); ++ FloatRegister next_fpr(); ++ int next_stack_offset(); ++ ++ public: ++ // Creation ++ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); ++ virtual ~SignatureHandlerGenerator() { ++ _masm = NULL; ++ } ++ ++ // Code generation ++ void generate(uint64_t fingerprint); ++ ++ // Code generation support ++ static Register from(); ++ static Register to(); ++ static Register temp(); ++}; ++ ++#endif // CPU_RISCV_INTERPRETERRT_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp +new file mode 100644 +index 000000000..a169b8c5f +--- /dev/null ++++ b/src/hotspot/cpu/riscv/javaFrameAnchor_riscv.hpp +@@ -0,0 +1,89 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP ++#define CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP ++ ++private: ++ ++ // FP value associated with _last_Java_sp: ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ ++public: ++ // Each arch must define reset, save, restore ++ // These are used by objects that only care about: ++ // 1 - initializing a new state (thread creation, javaCalls) ++ // 2 - saving a current state (javaCalls) ++ // 3 - restoring an old state (javaCalls) ++ ++ void clear(void) { ++ // clearing _last_Java_sp must be first ++ _last_Java_sp = NULL; ++ OrderAccess::release(); ++ _last_Java_fp = NULL; ++ _last_Java_pc = NULL; ++ } ++ ++ void copy(JavaFrameAnchor* src) { ++ // In order to make sure the transition state is valid for "this" ++ // We must clear _last_Java_sp before copying the rest of the new data ++ // ++ // Hack Alert: Temporary bugfix for 4717480/4721647 ++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp ++ // unless the value is changing ++ // ++ assert(src != NULL, "Src should not be NULL."); ++ if (_last_Java_sp != src->_last_Java_sp) { ++ _last_Java_sp = NULL; ++ OrderAccess::release(); ++ } ++ _last_Java_fp = src->_last_Java_fp; ++ _last_Java_pc = src->_last_Java_pc; ++ // Must be last so profiler will always see valid frame if has_last_frame() is true ++ _last_Java_sp = src->_last_Java_sp; ++ } ++ ++ bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; } ++ void make_walkable(JavaThread* thread); ++ void capture_last_Java_pc(void); ++ ++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++ ++ const address last_Java_pc(void) { return _last_Java_pc; } ++ ++private: ++ ++ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ ++public: ++ ++ void set_last_Java_sp(intptr_t* java_sp) { _last_Java_sp = java_sp; OrderAccess::release(); } ++ ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* java_fp) { OrderAccess::release(); _last_Java_fp = java_fp; } ++ ++#endif // CPU_RISCV_JAVAFRAMEANCHOR_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp +new file mode 100644 +index 000000000..9bab8e78f +--- /dev/null ++++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp +@@ -0,0 +1,193 @@ ++/* ++ * Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/safepoint.hpp" ++ ++#define __ masm-> ++ ++#define BUFFER_SIZE 30*wordSize ++ ++// Instead of issuing a LoadLoad barrier we create an address ++// dependency between loads; this might be more efficient. ++ ++// Common register usage: ++// x10/f10: result ++// c_rarg0: jni env ++// c_rarg1: obj ++// c_rarg2: jfield id ++ ++static const Register robj = x13; ++static const Register rcounter = x14; ++static const Register roffset = x15; ++static const Register rcounter_addr = x16; ++static const Register result = x17; ++ ++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { ++ const char *name; ++ switch (type) { ++ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; ++ case T_BYTE: name = "jni_fast_GetByteField"; break; ++ case T_CHAR: name = "jni_fast_GetCharField"; break; ++ case T_SHORT: name = "jni_fast_GetShortField"; break; ++ case T_INT: name = "jni_fast_GetIntField"; break; ++ case T_LONG: name = "jni_fast_GetLongField"; break; ++ case T_FLOAT: name = "jni_fast_GetFloatField"; break; ++ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; ++ default: ShouldNotReachHere(); ++ name = NULL; // unreachable ++ } ++ ResourceMark rm; ++ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); ++ CodeBuffer cbuf(blob); ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ address fast_entry = __ pc(); ++ ++ Label slow; ++ int32_t offset = 0; ++ __ la_patchable(rcounter_addr, SafepointSynchronize::safepoint_counter_addr(), offset); ++ __ addi(rcounter_addr, rcounter_addr, offset); ++ ++ Address safepoint_counter_addr(rcounter_addr, 0); ++ __ lwu(rcounter, safepoint_counter_addr); ++ // An even value means there are no ongoing safepoint operations ++ __ andi(t0, rcounter, 1); ++ __ bnez(t0, slow); ++ __ xorr(robj, c_rarg1, rcounter); ++ __ xorr(robj, robj, rcounter); // obj, since ++ // robj ^ rcounter ^ rcounter == robj ++ // robj is address dependent on rcounter. ++ ++ ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ assert_cond(bs != NULL); ++ bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, t0, slow); ++ ++ __ srli(roffset, c_rarg2, 2); // offset ++ ++ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); ++ speculative_load_pclist[count] = __ pc(); // Used by the segfault handler ++ __ add(roffset, robj, roffset); ++ switch (type) { ++ case T_BOOLEAN: __ lbu(result, Address(roffset, 0)); break; ++ case T_BYTE: __ lb(result, Address(roffset, 0)); break; ++ case T_CHAR: __ lhu(result, Address(roffset, 0)); break; ++ case T_SHORT: __ lh(result, Address(roffset, 0)); break; ++ case T_INT: __ lw(result, Address(roffset, 0)); break; ++ case T_LONG: __ ld(result, Address(roffset, 0)); break; ++ case T_FLOAT: { ++ __ flw(f28, Address(roffset, 0)); // f28 as temporaries ++ __ fmv_x_w(result, f28); // f{31--0}-->x ++ break; ++ } ++ case T_DOUBLE: { ++ __ fld(f28, Address(roffset, 0)); // f28 as temporaries ++ __ fmv_x_d(result, f28); // d{63--0}-->x ++ break; ++ } ++ default: ShouldNotReachHere(); ++ } ++ ++ // counter_addr is address dependent on result. ++ __ xorr(rcounter_addr, rcounter_addr, result); ++ __ xorr(rcounter_addr, rcounter_addr, result); ++ __ lw(t0, safepoint_counter_addr); ++ __ bne(rcounter, t0, slow); ++ ++ switch (type) { ++ case T_FLOAT: __ fmv_w_x(f10, result); break; ++ case T_DOUBLE: __ fmv_d_x(f10, result); break; ++ default: __ mv(x10, result); break; ++ } ++ __ ret(); ++ ++ slowcase_entry_pclist[count++] = __ pc(); ++ __ bind(slow); ++ address slow_case_addr; ++ switch (type) { ++ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; ++ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; ++ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; ++ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; ++ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; ++ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; ++ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; ++ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; ++ default: ShouldNotReachHere(); ++ slow_case_addr = NULL; // unreachable ++ } ++ ++ { ++ __ enter(); ++ int32_t tmp_offset = 0; ++ __ la_patchable(t0, ExternalAddress(slow_case_addr), tmp_offset); ++ __ jalr(x1, t0, tmp_offset); ++ __ leave(); ++ __ ret(); ++ } ++ __ flush(); ++ ++ return fast_entry; ++} ++ ++ ++address JNI_FastGetField::generate_fast_get_boolean_field() { ++ return generate_fast_get_int_field0(T_BOOLEAN); ++} ++ ++address JNI_FastGetField::generate_fast_get_byte_field() { ++ return generate_fast_get_int_field0(T_BYTE); ++} ++ ++address JNI_FastGetField::generate_fast_get_char_field() { ++ return generate_fast_get_int_field0(T_CHAR); ++} ++ ++address JNI_FastGetField::generate_fast_get_short_field() { ++ return generate_fast_get_int_field0(T_SHORT); ++} ++ ++address JNI_FastGetField::generate_fast_get_int_field() { ++ return generate_fast_get_int_field0(T_INT); ++} ++ ++address JNI_FastGetField::generate_fast_get_long_field() { ++ return generate_fast_get_int_field0(T_LONG); ++} ++ ++address JNI_FastGetField::generate_fast_get_float_field() { ++ return generate_fast_get_int_field0(T_FLOAT); ++} ++ ++address JNI_FastGetField::generate_fast_get_double_field() { ++ return generate_fast_get_int_field0(T_DOUBLE); ++} +diff --git a/src/hotspot/cpu/riscv/jniTypes_riscv.hpp b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +new file mode 100644 +index 000000000..96775e0db +--- /dev/null ++++ b/src/hotspot/cpu/riscv/jniTypes_riscv.hpp +@@ -0,0 +1,108 @@ ++/* ++ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_JNITYPES_RISCV_HPP ++#define CPU_RISCV_JNITYPES_RISCV_HPP ++ ++#include "jni.h" ++#include "memory/allocation.hpp" ++#include "oops/oop.hpp" ++ ++// This file holds platform-dependent routines used to write primitive jni ++// types to the array of arguments passed into JavaCalls::call ++ ++class JNITypes : private AllStatic { ++ // These functions write a java primitive type (in native format) ++ // to a java stack slot array to be passed as an argument to JavaCalls:calls. ++ // I.e., they are functionally 'push' operations if they have a 'pos' ++ // formal parameter. Note that jlong's and jdouble's are written ++ // _in reverse_ of the order in which they appear in the interpreter ++ // stack. This is because call stubs (see stubGenerator_sparc.cpp) ++ // reverse the argument list constructed by JavaCallArguments (see ++ // javaCalls.hpp). ++ ++public: ++ // Ints are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } ++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } ++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to+1). ++ static inline void put_long(jlong from, intptr_t *to) { ++ *(jlong*) (to + 1) = from; ++ } ++ ++ static inline void put_long(jlong from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_long(jlong *from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = *from; ++ pos += 2; ++ } ++ ++ // Oops are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } ++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } ++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } ++ ++ // Floats are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } ++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } ++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } ++ ++#undef _JNI_SLOT_OFFSET ++#define _JNI_SLOT_OFFSET 1 ++ // Doubles are stored in native word format in one JavaCallArgument ++ // slot at *(to+1). ++ static inline void put_double(jdouble from, intptr_t *to) { ++ *(jdouble*) (to + 1) = from; ++ } ++ ++ static inline void put_double(jdouble from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = *from; ++ pos += 2; ++ } ++ ++ // The get_xxx routines, on the other hand, actually _do_ fetch ++ // java primitive types from the interpreter stack. ++ // No need to worry about alignment on Intel. ++ static inline jint get_int (intptr_t *from) { return *(jint *) from; } ++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } ++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } ++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } ++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } ++#undef _JNI_SLOT_OFFSET ++}; ++ ++#endif // CPU_RISCV_JNITYPES_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +new file mode 100644 +index 000000000..5d6078bb3 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +@@ -0,0 +1,5861 @@ ++/* ++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/accessDecorators.hpp" ++#include "oops/compressedOops.inline.hpp" ++#include "oops/klass.inline.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/jniHandles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.hpp" ++#include "utilities/macros.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_LIRAssembler.hpp" ++#endif ++#ifdef COMPILER2 ++#include "oops/oop.hpp" ++#include "opto/compile.hpp" ++#include "opto/intrinsicnode.hpp" ++#include "opto/subnode.hpp" ++#endif ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#endif ++#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") ++ ++static void pass_arg0(MacroAssembler* masm, Register arg) { ++ if (c_rarg0 != arg) { ++ masm->mv(c_rarg0, arg); ++ } ++} ++ ++static void pass_arg1(MacroAssembler* masm, Register arg) { ++ if (c_rarg1 != arg) { ++ masm->mv(c_rarg1, arg); ++ } ++} ++ ++static void pass_arg2(MacroAssembler* masm, Register arg) { ++ if (c_rarg2 != arg) { ++ masm->mv(c_rarg2, arg); ++ } ++} ++ ++static void pass_arg3(MacroAssembler* masm, Register arg) { ++ if (c_rarg3 != arg) { ++ masm->mv(c_rarg3, arg); ++ } ++} ++ ++void MacroAssembler::align(int modulus) { ++ while (offset() % modulus != 0) { nop(); } ++} ++ ++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { ++ call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); ++} ++ ++// Implementation of call_VM versions ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions) { ++ call_VM_helper(oop_result, entry_point, 0, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ pass_arg1(this, arg_1); ++ call_VM_helper(oop_result, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ pass_arg1(this, arg_1); ++ call_VM_helper(oop_result, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ assert(arg_1 != c_rarg3, "smashed arg"); ++ assert(arg_2 != c_rarg3, "smashed arg"); ++ pass_arg3(this, arg_3); ++ ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ ++ pass_arg1(this, arg_1); ++ call_VM_helper(oop_result, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ pass_arg1(this, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ pass_arg1(this, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ assert(arg_1 != c_rarg3, "smashed arg"); ++ assert(arg_2 != c_rarg3, "smashed arg"); ++ pass_arg3(this, arg_3); ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ pass_arg1(this, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); ++} ++ ++// these are no-ops overridden by InterpreterMacroAssembler ++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {} ++void MacroAssembler::check_and_handle_popframe(Register java_thread) {} ++ ++// Calls to C land ++// ++// When entering C land, the fp, & esp of the last Java frame have to be recorded ++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp ++// has to be reset to 0. This is required to allow proper stack traversal. ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Register last_java_pc, ++ Register tmp) { ++ ++ if (last_java_pc->is_valid()) { ++ sd(last_java_pc, Address(xthread, ++ JavaThread::frame_anchor_offset() + ++ JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ ++ // determine last_java_sp register ++ if (last_java_sp == sp) { ++ mv(tmp, sp); ++ last_java_sp = tmp; ++ } else if (!last_java_sp->is_valid()) { ++ last_java_sp = esp; ++ } ++ ++ sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset())); ++ ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset())); ++ } ++} ++ ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc, ++ Register tmp) { ++ assert(last_java_pc != NULL, "must provide a valid PC"); ++ ++ la(tmp, last_java_pc); ++ sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp); ++} ++ ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ Label &L, ++ Register tmp) { ++ if (L.is_bound()) { ++ set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp); ++ } else { ++ L.add_patch_at(code(), locator()); ++ set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp); ++ } ++} ++ ++void MacroAssembler::reset_last_Java_frame(bool clear_fp) { ++ // we must set sp to zero to clear frame ++ sd(zr, Address(xthread, JavaThread::last_Java_sp_offset())); ++ ++ // must clear fp, so that compiled frames are not confused; it is ++ // possible that we need it only for debugging ++ if (clear_fp) { ++ sd(zr, Address(xthread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ sd(zr, Address(xthread, JavaThread::last_Java_pc_offset())); ++} ++ ++void MacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++ java_thread = xthread; ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = esp; ++ } ++ ++ // debugging support ++ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); ++ assert(java_thread == xthread, "unexpected register"); ++ ++ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); ++ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); ++ ++ // push java thread (becomes first argument of C function) ++ mv(c_rarg0, java_thread); ++ ++ // set last Java frame before call ++ assert(last_java_sp != fp, "can't use fp"); ++ ++ Label l; ++ set_last_Java_frame(last_java_sp, fp, l, t0); ++ ++ // do the call, remove parameters ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); ++ ++ // reset last Java frame ++ // Only interpreter should have to clear fp ++ reset_last_Java_frame(true); ++ ++ // C++ interp handles this in the interpreter ++ check_and_handle_popframe(java_thread); ++ check_and_handle_earlyret(java_thread); ++ ++ if (check_exceptions) { ++ // check for pending exceptions (java_thread is set upon return) ++ ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); ++ Label ok; ++ beqz(t0, ok); ++ int32_t offset = 0; ++ la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset); ++ jalr(x0, t0, offset); ++ bind(ok); ++ } ++ ++ // get oop result if there is one and reset the value in the thread ++ if (oop_result->is_valid()) { ++ get_vm_result(oop_result, java_thread); ++ } ++} ++ ++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { ++ ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); ++ sd(zr, Address(java_thread, JavaThread::vm_result_offset())); ++ verify_oop(oop_result, "broken oop in call_VM_base"); ++} ++ ++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { ++ ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); ++ sd(zr, Address(java_thread, JavaThread::vm_result_2_offset())); ++} ++ ++ ++void MacroAssembler::verify_oop(Register reg, const char* s) { ++ if (!VerifyOops) { return; } ++ ++ // Pass register number to verify_oop_subroutine ++ const char* b = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("verify_oop: %s: %s", reg->name(), s); ++ b = code_string(ss.as_string()); ++ } ++ BLOCK_COMMENT("verify_oop {"); ++ ++ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ ++ mv(c_rarg0, reg); // c_rarg0 : x10 ++ if(b != NULL) { ++ movptr(t0, (uintptr_t)(address)b); ++ } else { ++ ShouldNotReachHere(); ++ } ++ ++ // call indirectly to solve generation ordering problem ++ int32_t offset = 0; ++ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); ++ ld(t1, Address(t1, offset)); ++ jalr(t1); ++ ++ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ ++ BLOCK_COMMENT("} verify_oop"); ++} ++ ++void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ++ if (!VerifyOops) { ++ return; ++ } ++ ++ const char* b = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("verify_oop_addr: %s", s); ++ b = code_string(ss.as_string()); ++ } ++ BLOCK_COMMENT("verify_oop_addr {"); ++ ++ push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ ++ if (addr.uses(sp)) { ++ la(x10, addr); ++ ld(x10, Address(x10, 4 * wordSize)); ++ } else { ++ ld(x10, addr); ++ } ++ if(b != NULL) { ++ movptr(t0, (uintptr_t)(address)b); ++ } else { ++ ShouldNotReachHere(); ++ } ++ ++ // call indirectly to solve generation ordering problem ++ int32_t offset = 0; ++ la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset); ++ ld(t1, Address(t1, offset)); ++ jalr(t1); ++ ++ pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp); ++ ++ BLOCK_COMMENT("} verify_oop_addr"); ++} ++ ++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, ++ int extra_slot_offset) { ++ // cf. TemplateTable::prepare_invoke(), if (load_receiver). ++ int stackElementSize = Interpreter::stackElementSize; ++ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); ++#ifdef ASSERT ++ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); ++ assert(offset1 - offset == stackElementSize, "correct arithmetic"); ++#endif ++ if (arg_slot.is_constant()) { ++ return Address(esp, arg_slot.as_constant() * stackElementSize + offset); ++ } else { ++ assert_different_registers(t0, arg_slot.as_register()); ++ shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize)); ++ return Address(t0, offset); ++ } ++} ++ ++#ifndef PRODUCT ++extern "C" void findpc(intptr_t x); ++#endif ++ ++void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) ++{ ++ // In order to get locks to work, we need to fake a in_VM state ++ if (ShowMessageBoxOnError) { ++ JavaThread* thread = JavaThread::current(); ++ JavaThreadState saved_state = thread->thread_state(); ++ thread->set_thread_state(_thread_in_vm); ++#ifndef PRODUCT ++ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ++ ttyLocker ttyl; ++ BytecodeCounter::print(); ++ } ++#endif ++ if (os::message_box(msg, "Execution stopped, print registers?")) { ++ ttyLocker ttyl; ++ tty->print_cr(" pc = 0x%016" PRIX64, pc); ++#ifndef PRODUCT ++ tty->cr(); ++ findpc(pc); ++ tty->cr(); ++#endif ++ tty->print_cr(" x0 = 0x%016" PRIx64, regs[0]); ++ tty->print_cr(" x1 = 0x%016" PRIx64, regs[1]); ++ tty->print_cr(" x2 = 0x%016" PRIx64, regs[2]); ++ tty->print_cr(" x3 = 0x%016" PRIx64, regs[3]); ++ tty->print_cr(" x4 = 0x%016" PRIx64, regs[4]); ++ tty->print_cr(" x5 = 0x%016" PRIx64, regs[5]); ++ tty->print_cr(" x6 = 0x%016" PRIx64, regs[6]); ++ tty->print_cr(" x7 = 0x%016" PRIx64, regs[7]); ++ tty->print_cr(" x8 = 0x%016" PRIx64, regs[8]); ++ tty->print_cr(" x9 = 0x%016" PRIx64, regs[9]); ++ tty->print_cr("x10 = 0x%016" PRIx64, regs[10]); ++ tty->print_cr("x11 = 0x%016" PRIx64, regs[11]); ++ tty->print_cr("x12 = 0x%016" PRIx64, regs[12]); ++ tty->print_cr("x13 = 0x%016" PRIx64, regs[13]); ++ tty->print_cr("x14 = 0x%016" PRIx64, regs[14]); ++ tty->print_cr("x15 = 0x%016" PRIx64, regs[15]); ++ tty->print_cr("x16 = 0x%016" PRIx64, regs[16]); ++ tty->print_cr("x17 = 0x%016" PRIx64, regs[17]); ++ tty->print_cr("x18 = 0x%016" PRIx64, regs[18]); ++ tty->print_cr("x19 = 0x%016" PRIx64, regs[19]); ++ tty->print_cr("x20 = 0x%016" PRIx64, regs[20]); ++ tty->print_cr("x21 = 0x%016" PRIx64, regs[21]); ++ tty->print_cr("x22 = 0x%016" PRIx64, regs[22]); ++ tty->print_cr("x23 = 0x%016" PRIx64, regs[23]); ++ tty->print_cr("x24 = 0x%016" PRIx64, regs[24]); ++ tty->print_cr("x25 = 0x%016" PRIx64, regs[25]); ++ tty->print_cr("x26 = 0x%016" PRIx64, regs[26]); ++ tty->print_cr("x27 = 0x%016" PRIx64, regs[27]); ++ tty->print_cr("x28 = 0x%016" PRIx64, regs[28]); ++ tty->print_cr("x30 = 0x%016" PRIx64, regs[30]); ++ tty->print_cr("x31 = 0x%016" PRIx64, regs[31]); ++ BREAKPOINT; ++ } ++ ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); ++ } else { ++ ttyLocker ttyl; ++ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); ++ assert(false, "DEBUG MESSAGE: %s", msg); ++ } ++} ++ ++void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) { ++ Label done, not_weak; ++ beqz(value, done); // Use NULL as-is. ++ ++ // Test for jweak tag. ++ andi(t0, value, JNIHandles::weak_tag_mask); ++ beqz(t0, not_weak); ++ ++ // Resolve jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value, ++ Address(value, -JNIHandles::weak_tag_value), tmp, thread); ++ verify_oop(value); ++ j(done); ++ ++ bind(not_weak); ++ // Resolve (untagged) jobject. ++ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); ++ verify_oop(value); ++ bind(done); ++} ++ ++void MacroAssembler::stop(const char* msg) { ++ address ip = pc(); ++ push_reg(RegSet::range(x0, x31), sp); ++ if(msg != NULL && ip != NULL) { ++ mv(c_rarg0, (uintptr_t)(address)msg); ++ mv(c_rarg1, (uintptr_t)(address)ip); ++ } else { ++ ShouldNotReachHere(); ++ } ++ mv(c_rarg2, sp); ++ mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); ++ jalr(c_rarg3); ++ ebreak(); ++} ++ ++void MacroAssembler::unimplemented(const char* what) { ++ const char* buf = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("unimplemented: %s", what); ++ buf = code_string(ss.as_string()); ++ } ++ stop(buf); ++} ++ ++void MacroAssembler::emit_static_call_stub() { ++ // CompiledDirectStaticCall::set_to_interpreted knows the ++ // exact layout of this stub. ++ ++ mov_metadata(xmethod, (Metadata*)NULL); ++ ++ // Jump to the entry point of the i2c stub. ++ int32_t offset = 0; ++ movptr_with_offset(t0, 0, offset); ++ jalr(x0, t0, offset); ++} ++ ++void MacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments, ++ Label *retaddr) { ++ int32_t offset = 0; ++ push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp ++ movptr_with_offset(t0, entry_point, offset); ++ jalr(x1, t0, offset); ++ if (retaddr != NULL) { ++ bind(*retaddr); ++ } ++ pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { ++ call_VM_leaf_base(entry_point, number_of_arguments); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { ++ pass_arg0(this, arg_0); ++ call_VM_leaf_base(entry_point, 1); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ pass_arg0(this, arg_0); ++ pass_arg1(this, arg_1); ++ call_VM_leaf_base(entry_point, 2); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, ++ Register arg_1, Register arg_2) { ++ pass_arg0(this, arg_0); ++ pass_arg1(this, arg_1); ++ pass_arg2(this, arg_2); ++ call_VM_leaf_base(entry_point, 3); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { ++ pass_arg0(this, arg_0); ++ MacroAssembler::call_VM_leaf_base(entry_point, 1); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ ++ assert(arg_0 != c_rarg1, "smashed arg"); ++ pass_arg1(this, arg_1); ++ pass_arg0(this, arg_0); ++ MacroAssembler::call_VM_leaf_base(entry_point, 2); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { ++ assert(arg_0 != c_rarg2, "smashed arg"); ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ assert(arg_0 != c_rarg1, "smashed arg"); ++ pass_arg1(this, arg_1); ++ pass_arg0(this, arg_0); ++ MacroAssembler::call_VM_leaf_base(entry_point, 3); ++} ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { ++ assert(arg_0 != c_rarg3, "smashed arg"); ++ assert(arg_1 != c_rarg3, "smashed arg"); ++ assert(arg_2 != c_rarg3, "smashed arg"); ++ pass_arg3(this, arg_3); ++ assert(arg_0 != c_rarg2, "smashed arg"); ++ assert(arg_1 != c_rarg2, "smashed arg"); ++ pass_arg2(this, arg_2); ++ assert(arg_0 != c_rarg1, "smashed arg"); ++ pass_arg1(this, arg_1); ++ pass_arg0(this, arg_0); ++ MacroAssembler::call_VM_leaf_base(entry_point, 4); ++} ++ ++void MacroAssembler::nop() { ++ addi(x0, x0, 0); ++} ++ ++void MacroAssembler::mv(Register Rd, Register Rs) { ++ if (Rd != Rs) { ++ addi(Rd, Rs, 0); ++ } ++} ++ ++void MacroAssembler::notr(Register Rd, Register Rs) { ++ xori(Rd, Rs, -1); ++} ++ ++void MacroAssembler::neg(Register Rd, Register Rs) { ++ sub(Rd, x0, Rs); ++} ++ ++void MacroAssembler::negw(Register Rd, Register Rs) { ++ subw(Rd, x0, Rs); ++} ++ ++void MacroAssembler::sext_w(Register Rd, Register Rs) { ++ addiw(Rd, Rs, 0); ++} ++ ++void MacroAssembler::zext_b(Register Rd, Register Rs) { ++ andi(Rd, Rs, 0xFF); ++} ++ ++void MacroAssembler::seqz(Register Rd, Register Rs) { ++ sltiu(Rd, Rs, 1); ++} ++ ++void MacroAssembler::snez(Register Rd, Register Rs) { ++ sltu(Rd, x0, Rs); ++} ++ ++void MacroAssembler::sltz(Register Rd, Register Rs) { ++ slt(Rd, Rs, x0); ++} ++ ++void MacroAssembler::sgtz(Register Rd, Register Rs) { ++ slt(Rd, x0, Rs); ++} ++ ++void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) { ++ if (Rd != Rs) { ++ fsgnj_s(Rd, Rs, Rs); ++ } ++} ++ ++void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) { ++ fsgnjx_s(Rd, Rs, Rs); ++} ++ ++void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) { ++ fsgnjn_s(Rd, Rs, Rs); ++} ++ ++void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) { ++ if (Rd != Rs) { ++ fsgnj_d(Rd, Rs, Rs); ++ } ++} ++ ++void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) { ++ fsgnjx_d(Rd, Rs, Rs); ++} ++ ++void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) { ++ fsgnjn_d(Rd, Rs, Rs); ++} ++ ++void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) { ++ vmnand_mm(vd, vs, vs); ++} ++ ++void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) { ++ vnsrl_wx(vd, vs, x0, vm); ++} ++ ++void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) { ++ vfsgnjn_vv(vd, vs, vs); ++} ++ ++void MacroAssembler::la(Register Rd, const address &dest) { ++ int64_t offset = dest - pc(); ++ if (is_offset_in_range(offset, 32)) { ++ auipc(Rd, (int32_t)offset + 0x800); //0x800, Note:the 11th sign bit ++ addi(Rd, Rd, ((int64_t)offset << 52) >> 52); ++ } else { ++ movptr(Rd, dest); ++ } ++} ++ ++void MacroAssembler::la(Register Rd, const Address &adr) { ++ code_section()->relocate(pc(), adr.rspec()); ++ relocInfo::relocType rtype = adr.rspec().reloc()->type(); ++ ++ switch(adr.getMode()) { ++ case Address::literal: { ++ if (rtype == relocInfo::none) { ++ mv(Rd, (intptr_t)(adr.target())); ++ } else { ++ movptr(Rd, adr.target()); ++ } ++ break; ++ } ++ case Address::base_plus_offset:{ ++ Register base = adr.base(); ++ int64_t offset = adr.offset(); ++ if (offset == 0 && Rd != base) { ++ mv(Rd, base); ++ } else if (offset != 0 && Rd != base) { ++ add(Rd, base, offset, Rd); ++ } else if (offset != 0 && Rd == base) { ++ Register tmp = (Rd == t0) ? t1 : t0; ++ add(base, base, offset, tmp); ++ } ++ break; ++ } ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::la(Register Rd, Label &label) { ++ la(Rd, target(label)); ++} ++ ++#define INSN(NAME) \ ++ void MacroAssembler::NAME##z(Register Rs, const address &dest) { \ ++ NAME(Rs, zr, dest); \ ++ } \ ++ void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) { \ ++ NAME(Rs, zr, l, is_far); \ ++ } \ ++ ++ INSN(beq); ++ INSN(bne); ++ INSN(blt); ++ INSN(ble); ++ INSN(bge); ++ INSN(bgt); ++ ++#undef INSN ++ ++// Float compare branch instructions ++ ++#define INSN(NAME, FLOATCMP, BRANCH) \ ++ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ ++ FLOATCMP##_s(t0, Rs1, Rs2); \ ++ BRANCH(t0, l, is_far); \ ++ } \ ++ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \ ++ FLOATCMP##_d(t0, Rs1, Rs2); \ ++ BRANCH(t0, l, is_far); \ ++ } ++ ++ INSN(beq, feq, bnez); ++ INSN(bne, feq, beqz); ++#undef INSN ++ ++ ++#define INSN(NAME, FLOATCMP1, FLOATCMP2) \ ++ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ ++ bool is_far, bool is_unordered) { \ ++ if(is_unordered) { \ ++ FLOATCMP2##_s(t0, Rs2, Rs1); \ ++ beqz(t0, l, is_far); \ ++ } else { \ ++ FLOATCMP1##_s(t0, Rs1, Rs2); \ ++ bnez(t0, l, is_far); \ ++ } \ ++ } \ ++ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ ++ bool is_far, bool is_unordered) { \ ++ if(is_unordered) { \ ++ FLOATCMP2##_d(t0, Rs2, Rs1); \ ++ beqz(t0, l, is_far); \ ++ } else { \ ++ FLOATCMP1##_d(t0, Rs1, Rs2); \ ++ bnez(t0, l, is_far); \ ++ } \ ++ } ++ ++ INSN(ble, fle, flt); ++ INSN(blt, flt, fle); ++ ++#undef INSN ++ ++#define INSN(NAME, CMP) \ ++ void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ ++ bool is_far, bool is_unordered) { \ ++ float_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ ++ } \ ++ void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \ ++ bool is_far, bool is_unordered) { \ ++ double_##CMP(Rs2, Rs1, l, is_far, is_unordered); \ ++ } ++ ++ INSN(bgt, blt); ++ INSN(bge, ble); ++ ++#undef INSN ++ ++ ++#define INSN(NAME, CSR) \ ++ void MacroAssembler::NAME(Register Rd) { \ ++ csrr(Rd, CSR); \ ++ } ++ ++ INSN(rdinstret, CSR_INSTERT); ++ INSN(rdcycle, CSR_CYCLE); ++ INSN(rdtime, CSR_TIME); ++ INSN(frcsr, CSR_FCSR); ++ INSN(frrm, CSR_FRM); ++ INSN(frflags, CSR_FFLAGS); ++ ++#undef INSN ++ ++void MacroAssembler::csrr(Register Rd, unsigned csr) { ++ csrrs(Rd, csr, x0); ++} ++ ++#define INSN(NAME, OPFUN) \ ++ void MacroAssembler::NAME(unsigned csr, Register Rs) { \ ++ OPFUN(x0, csr, Rs); \ ++ } ++ ++ INSN(csrw, csrrw); ++ INSN(csrs, csrrs); ++ INSN(csrc, csrrc); ++ ++#undef INSN ++ ++#define INSN(NAME, OPFUN) \ ++ void MacroAssembler::NAME(unsigned csr, unsigned imm) { \ ++ OPFUN(x0, csr, imm); \ ++ } ++ ++ INSN(csrwi, csrrwi); ++ INSN(csrsi, csrrsi); ++ INSN(csrci, csrrci); ++ ++#undef INSN ++ ++#define INSN(NAME, CSR) \ ++ void MacroAssembler::NAME(Register Rd, Register Rs) { \ ++ csrrw(Rd, CSR, Rs); \ ++ } ++ ++ INSN(fscsr, CSR_FCSR); ++ INSN(fsrm, CSR_FRM); ++ INSN(fsflags, CSR_FFLAGS); ++ ++#undef INSN ++ ++#define INSN(NAME) \ ++ void MacroAssembler::NAME(Register Rs) { \ ++ NAME(x0, Rs); \ ++ } ++ ++ INSN(fscsr); ++ INSN(fsrm); ++ INSN(fsflags); ++ ++#undef INSN ++ ++void MacroAssembler::fsrmi(Register Rd, unsigned imm) { ++ guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register"); ++ csrrwi(Rd, CSR_FRM, imm); ++} ++ ++void MacroAssembler::fsflagsi(Register Rd, unsigned imm) { ++ csrrwi(Rd, CSR_FFLAGS, imm); ++} ++ ++#define INSN(NAME) \ ++ void MacroAssembler::NAME(unsigned imm) { \ ++ NAME(x0, imm); \ ++ } ++ ++ INSN(fsrmi); ++ INSN(fsflagsi); ++ ++#undef INSN ++ ++#ifdef COMPILER2 ++ ++typedef void (Assembler::*conditional_branch_insn)(Register op1, Register op2, Label& label, bool is_far); ++typedef void (MacroAssembler::*float_conditional_branch_insn)(FloatRegister op1, FloatRegister op2, Label& label, ++ bool is_far, bool is_unordered); ++ ++static conditional_branch_insn conditional_branches[] = ++{ ++ /* SHORT branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgt, ++ NULL, // BoolTest::overflow ++ (conditional_branch_insn)&Assembler::blt, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::ble, ++ NULL, // BoolTest::no_overflow ++ (conditional_branch_insn)&Assembler::bge, ++ ++ /* UNSIGNED branches */ ++ (conditional_branch_insn)&Assembler::beq, ++ (conditional_branch_insn)&Assembler::bgtu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bltu, ++ (conditional_branch_insn)&Assembler::bne, ++ (conditional_branch_insn)&Assembler::bleu, ++ NULL, ++ (conditional_branch_insn)&Assembler::bgeu ++}; ++ ++static float_conditional_branch_insn float_conditional_branches[] = ++{ ++ /* FLOAT SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::float_beq, ++ (float_conditional_branch_insn)&MacroAssembler::float_bgt, ++ NULL, // BoolTest::overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_blt, ++ (float_conditional_branch_insn)&MacroAssembler::float_bne, ++ (float_conditional_branch_insn)&MacroAssembler::float_ble, ++ NULL, // BoolTest::no_overflow ++ (float_conditional_branch_insn)&MacroAssembler::float_bge, ++ ++ /* DOUBLE SHORT branches */ ++ (float_conditional_branch_insn)&MacroAssembler::double_beq, ++ (float_conditional_branch_insn)&MacroAssembler::double_bgt, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_blt, ++ (float_conditional_branch_insn)&MacroAssembler::double_bne, ++ (float_conditional_branch_insn)&MacroAssembler::double_ble, ++ NULL, ++ (float_conditional_branch_insn)&MacroAssembler::double_bge ++}; ++ ++void MacroAssembler::cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(conditional_branches) / sizeof(conditional_branches[0])), ++ "invalid conditional branch index"); ++ (this->*conditional_branches[cmpFlag])(op1, op2, label, is_far); ++} ++ ++// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use ++// unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). ++void MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) { ++ assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), ++ "invalid float conditional branch index"); ++ int booltest_flag = cmpFlag & ~(MacroAssembler::double_branch_mask); ++ (this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far, ++ (booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true); ++} ++ ++void MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ case BoolTest::le: ++ beqz(op1, L, is_far); ++ break; ++ case BoolTest::ne: ++ case BoolTest::gt: ++ bnez(op1, L, is_far); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { ++ switch (cmpFlag) { ++ case BoolTest::eq: ++ beqz(op1, L, is_far); ++ break; ++ case BoolTest::ne: ++ bnez(op1, L, is_far); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src) { ++ Label L; ++ cmp_branch(cmpFlag ^ (1 << neg_cond_bits), op1, op2, L); ++ mv(dst, src); ++ bind(L); ++} ++#endif ++ ++void MacroAssembler::push_reg(Register Rs) ++{ ++ addi(esp, esp, 0 - wordSize); ++ sd(Rs, Address(esp, 0)); ++} ++ ++void MacroAssembler::pop_reg(Register Rd) ++{ ++ ld(Rd, esp, 0); ++ addi(esp, esp, wordSize); ++} ++ ++int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) { ++ int count = 0; ++ // Scan bitset to accumulate register pairs ++ for (int reg = 31; reg >= 0; reg --) { ++ if ((1U << 31) & bitset) { ++ regs[count++] = reg; ++ } ++ bitset <<= 1; ++ } ++ return count; ++} ++ ++// Push lots of registers in the bit set supplied. Don't push sp. ++// Return the number of words pushed ++int MacroAssembler::push_reg(unsigned int bitset, Register stack) { ++ DEBUG_ONLY(int words_pushed = 0;) ++ ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ // reserve one slot to align for odd count ++ int offset = is_even(count) ? 0 : wordSize; ++ ++ if (count) { ++ addi(stack, stack, - count * wordSize - offset); ++ } ++ for (int i = count - 1; i >= 0; i--) { ++ sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); ++ DEBUG_ONLY(words_pushed ++;) ++ } ++ ++ assert(words_pushed == count, "oops, pushed != count"); ++ ++ return count; ++} ++ ++int MacroAssembler::pop_reg(unsigned int bitset, Register stack) { ++ DEBUG_ONLY(int words_popped = 0;) ++ ++ unsigned char regs[32]; ++ int count = bitset_to_regs(bitset, regs); ++ // reserve one slot to align for odd count ++ int offset = is_even(count) ? 0 : wordSize; ++ ++ for (int i = count - 1; i >= 0; i--) { ++ ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset)); ++ DEBUG_ONLY(words_popped ++;) ++ } ++ ++ if (count) { ++ addi(stack, stack, count * wordSize + offset); ++ } ++ assert(words_popped == count, "oops, popped != count"); ++ ++ return count; ++} ++ ++RegSet MacroAssembler::call_clobbered_registers() { ++ // Push integer registers x7, x10-x17, x28-x31. ++ return RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31); ++} ++ ++void MacroAssembler::push_call_clobbered_registers() { ++ push_reg(call_clobbered_registers(), sp); ++ ++ // Push float registers f0-f7, f10-f17, f28-f31. ++ addi(sp, sp, - wordSize * 20); ++ int offset = 0; ++ for (int i = 0; i < 32; i++) { ++ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { ++ fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); ++ } ++ } ++} ++ ++void MacroAssembler::pop_call_clobbered_registers() { ++ int offset = 0; ++ for (int i = 0; i < 32; i++) { ++ if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) { ++ fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++))); ++ } ++ } ++ addi(sp, sp, wordSize * 20); ++ ++ pop_reg(call_clobbered_registers(), sp); ++} ++ ++void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) { ++ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) ++ push_reg(RegSet::range(x5, x31), sp); ++ ++ // float registers ++ addi(sp, sp, - 32 * wordSize); ++ for (int i = 0; i < 32; i++) { ++ fsd(as_FloatRegister(i), Address(sp, i * wordSize)); ++ } ++ ++ // vector registers ++ if (save_vectors) { ++ sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers); ++ vsetvli(t0, x0, Assembler::e64, Assembler::m8); ++ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { ++ add(t0, sp, vector_size_in_bytes * i); ++ vse64_v(as_VectorRegister(i), t0); ++ } ++ } ++} ++ ++void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) { ++ // vector registers ++ if (restore_vectors) { ++ vsetvli(t0, x0, Assembler::e64, Assembler::m8); ++ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) { ++ vle64_v(as_VectorRegister(i), sp); ++ add(sp, sp, vector_size_in_bytes * 8); ++ } ++ } ++ ++ // float registers ++ for (int i = 0; i < 32; i++) { ++ fld(as_FloatRegister(i), Address(sp, i * wordSize)); ++ } ++ addi(sp, sp, 32 * wordSize); ++ ++ // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4) ++ pop_reg(RegSet::range(x5, x31), sp); ++} ++ ++static int patch_offset_in_jal(address branch, int64_t offset) { ++ assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n"); ++ Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31] ++ Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21] ++ Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20] ++ Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff); // offset[19:12] ==> branch[19:12] ++ return NativeInstruction::instruction_size; // only one instruction ++} ++ ++static int patch_offset_in_conditional_branch(address branch, int64_t offset) { ++ assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n"); ++ Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1); // offset[12] ==> branch[31] ++ Assembler::patch(branch, 30, 25, (offset >> 5) & 0x3f); // offset[10:5] ==> branch[30:25] ++ Assembler::patch(branch, 7, 7, (offset >> 11) & 0x1); // offset[11] ==> branch[7] ++ Assembler::patch(branch, 11, 8, (offset >> 1) & 0xf); // offset[4:1] ==> branch[11:8] ++ return NativeInstruction::instruction_size; // only one instruction ++} ++ ++static int patch_offset_in_pc_relative(address branch, int64_t offset) { ++ const int PC_RELATIVE_INSTRUCTION_NUM = 2; // auipc, addi/jalr/load ++ Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff); // Auipc. offset[31:12] ==> branch[31:12] ++ Assembler::patch(branch + 4, 31, 20, offset & 0xfff); // Addi/Jalr/Load. offset[11:0] ==> branch[31:20] ++ return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size; ++} ++ ++static int patch_addr_in_movptr(address branch, address target) { ++ const int MOVPTR_INSTRUCTIONS_NUM = 6; // lui + addi + slli + addi + slli + addi/jalr/load ++ int32_t lower = ((intptr_t)target << 35) >> 35; ++ int64_t upper = ((intptr_t)target - lower) >> 29; ++ Assembler::patch(branch + 0, 31, 12, upper & 0xfffff); // Lui. target[48:29] + target[28] ==> branch[31:12] ++ Assembler::patch(branch + 4, 31, 20, (lower >> 17) & 0xfff); // Addi. target[28:17] ==> branch[31:20] ++ Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff); // Addi. target[16: 6] ==> branch[31:20] ++ Assembler::patch(branch + 20, 31, 20, lower & 0x3f); // Addi/Jalr/Load. target[ 5: 0] ==> branch[31:20] ++ return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; ++} ++ ++static int patch_imm_in_li64(address branch, address target) { ++ const int LI64_INSTRUCTIONS_NUM = 8; // lui + addi + slli + addi + slli + addi + slli + addi ++ int64_t lower = (intptr_t)target & 0xffffffff; ++ lower = lower - ((lower << 44) >> 44); ++ int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower; ++ int32_t upper = (tmp_imm - (int32_t)lower) >> 32; ++ int64_t tmp_upper = upper, tmp_lower = upper; ++ tmp_lower = (tmp_lower << 52) >> 52; ++ tmp_upper -= tmp_lower; ++ tmp_upper >>= 12; ++ // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1), ++ // upper = target[63:32] + 1. ++ Assembler::patch(branch + 0, 31, 12, tmp_upper & 0xfffff); // Lui. ++ Assembler::patch(branch + 4, 31, 20, tmp_lower & 0xfff); // Addi. ++ // Load the rest 32 bits. ++ Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff); // Addi. ++ Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff); // Addi. ++ Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff); // Addi. ++ return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; ++} ++ ++static int patch_imm_in_li32(address branch, int32_t target) { ++ const int LI32_INSTRUCTIONS_NUM = 2; // lui + addiw ++ int64_t upper = (intptr_t)target; ++ int32_t lower = (((int32_t)target) << 20) >> 20; ++ upper -= lower; ++ upper = (int32_t)upper; ++ Assembler::patch(branch + 0, 31, 12, (upper >> 12) & 0xfffff); // Lui. ++ Assembler::patch(branch + 4, 31, 20, lower & 0xfff); // Addiw. ++ return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size; ++} ++ ++static long get_offset_of_jal(address insn_addr) { ++ assert_cond(insn_addr != NULL); ++ long offset = 0; ++ unsigned insn = *(unsigned*)insn_addr; ++ long val = (long)Assembler::sextract(insn, 31, 12); ++ offset |= ((val >> 19) & 0x1) << 20; ++ offset |= (val & 0xff) << 12; ++ offset |= ((val >> 8) & 0x1) << 11; ++ offset |= ((val >> 9) & 0x3ff) << 1; ++ offset = (offset << 43) >> 43; ++ return offset; ++} ++ ++static long get_offset_of_conditional_branch(address insn_addr) { ++ long offset = 0; ++ assert_cond(insn_addr != NULL); ++ unsigned insn = *(unsigned*)insn_addr; ++ offset = (long)Assembler::sextract(insn, 31, 31); ++ offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11); ++ offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5); ++ offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1); ++ offset = (offset << 41) >> 41; ++ return offset; ++} ++ ++static long get_offset_of_pc_relative(address insn_addr) { ++ long offset = 0; ++ assert_cond(insn_addr != NULL); ++ offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12; // Auipc. ++ offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addi/Jalr/Load. ++ offset = (offset << 32) >> 32; ++ return offset; ++} ++ ++static address get_target_of_movptr(address insn_addr) { ++ assert_cond(insn_addr != NULL); ++ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29; // Lui. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)); // Addi/Jalr/Load. ++ return (address) target_address; ++} ++ ++static address get_target_of_li64(address insn_addr) { ++ assert_cond(insn_addr != NULL); ++ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44; // Lui. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8; // Addi. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20)); // Addi. ++ return (address)target_address; ++} ++ ++static address get_target_of_li32(address insn_addr) { ++ assert_cond(insn_addr != NULL); ++ intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12; // Lui. ++ target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)); // Addiw. ++ return (address)target_address; ++} ++ ++// Patch any kind of instruction; there may be several instructions. ++// Return the total length (in bytes) of the instructions. ++int MacroAssembler::pd_patch_instruction_size(address branch, address target) { ++ assert_cond(branch != NULL); ++ int64_t offset = target - branch; ++ if (NativeInstruction::is_jal_at(branch)) { // jal ++ return patch_offset_in_jal(branch, offset); ++ } else if (NativeInstruction::is_branch_at(branch)) { // beq/bge/bgeu/blt/bltu/bne ++ return patch_offset_in_conditional_branch(branch, offset); ++ } else if (NativeInstruction::is_pc_relative_at(branch)) { // auipc, addi/jalr/load ++ return patch_offset_in_pc_relative(branch, offset); ++ } else if (NativeInstruction::is_movptr_at(branch)) { // movptr ++ return patch_addr_in_movptr(branch, target); ++ } else if (NativeInstruction::is_li64_at(branch)) { // li64 ++ return patch_imm_in_li64(branch, target); ++ } else if (NativeInstruction::is_li32_at(branch)) { // li32 ++ int64_t imm = (intptr_t)target; ++ return patch_imm_in_li32(branch, (int32_t)imm); ++ } else { ++ tty->print_cr("pd_patch_instruction_size: instruction 0x%x could not be patched!\n", *(unsigned*)branch); ++ ShouldNotReachHere(); ++ } ++ return -1; ++} ++ ++address MacroAssembler::target_addr_for_insn(address insn_addr) { ++ long offset = 0; ++ assert_cond(insn_addr != NULL); ++ if (NativeInstruction::is_jal_at(insn_addr)) { // jal ++ offset = get_offset_of_jal(insn_addr); ++ } else if (NativeInstruction::is_branch_at(insn_addr)) { // beq/bge/bgeu/blt/bltu/bne ++ offset = get_offset_of_conditional_branch(insn_addr); ++ } else if (NativeInstruction::is_pc_relative_at(insn_addr)) { // auipc, addi/jalr/load ++ offset = get_offset_of_pc_relative(insn_addr); ++ } else if (NativeInstruction::is_movptr_at(insn_addr)) { // movptr ++ return get_target_of_movptr(insn_addr); ++ } else if (NativeInstruction::is_li64_at(insn_addr)) { // li64 ++ return get_target_of_li64(insn_addr); ++ } else if (NativeInstruction::is_li32_at(insn_addr)) { // li32 ++ return get_target_of_li32(insn_addr); ++ } else { ++ ShouldNotReachHere(); ++ } ++ return address(((uintptr_t)insn_addr + offset)); ++} ++ ++int MacroAssembler::patch_oop(address insn_addr, address o) { ++ // OOPs are either narrow (32 bits) or wide (48 bits). We encode ++ // narrow OOPs by setting the upper 16 bits in the first ++ // instruction. ++ if (NativeInstruction::is_li32_at(insn_addr)) { ++ // Move narrow OOP ++ narrowOop n = CompressedOops::encode((oop)o); ++ return patch_imm_in_li32(insn_addr, (int32_t)n); ++ } else if (NativeInstruction::is_movptr_at(insn_addr)) { ++ // Move wide OOP ++ return patch_addr_in_movptr(insn_addr, o); ++ } ++ ShouldNotReachHere(); ++ return -1; ++} ++ ++void MacroAssembler::reinit_heapbase() { ++ if (UseCompressedOops) { ++ if (Universe::is_fully_initialized()) { ++ mv(xheapbase, Universe::narrow_ptrs_base()); ++ } else { ++ int32_t offset = 0; ++ la_patchable(xheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()), offset); ++ ld(xheapbase, Address(xheapbase, offset)); ++ } ++ } ++} ++ ++void MacroAssembler::mv(Register Rd, Address dest) { ++ assert(dest.getMode() == Address::literal, "Address mode should be Address::literal"); ++ code_section()->relocate(pc(), dest.rspec()); ++ movptr(Rd, dest.target()); ++} ++void MacroAssembler::mv(Register Rd, RegisterOrConstant src) { ++ if (src.is_register()) { ++ mv(Rd, src.as_register()); ++ } else { ++ mv(Rd, src.as_constant()); ++ } ++} ++ ++void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) { ++ andr(Rd, Rs1, Rs2); ++ // addw: The result is clipped to 32 bits, then the sign bit is extended, ++ // and the result is stored in Rd ++ addw(Rd, Rd, zr); ++} ++ ++void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) { ++ orr(Rd, Rs1, Rs2); ++ // addw: The result is clipped to 32 bits, then the sign bit is extended, ++ // and the result is stored in Rd ++ addw(Rd, Rd, zr); ++} ++ ++void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) { ++ xorr(Rd, Rs1, Rs2); ++ // addw: The result is clipped to 32 bits, then the sign bit is extended, ++ // and the result is stored in Rd ++ addw(Rd, Rd, zr); ++} ++ ++// Note: load_unsigned_short used to be called load_unsigned_word. ++int MacroAssembler::load_unsigned_short(Register dst, Address src) { ++ int off = offset(); ++ lhu(dst, src); ++ return off; ++} ++ ++int MacroAssembler::load_unsigned_byte(Register dst, Address src) { ++ int off = offset(); ++ lbu(dst, src); ++ return off; ++} ++ ++int MacroAssembler::load_signed_short(Register dst, Address src) { ++ int off = offset(); ++ lh(dst, src); ++ return off; ++} ++ ++int MacroAssembler::load_signed_byte(Register dst, Address src) { ++ int off = offset(); ++ lb(dst, src); ++ return off; ++} ++ ++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { ++ switch (size_in_bytes) { ++ case 8: ld(dst, src); break; ++ case 4: is_signed ? lw(dst, src) : lwu(dst, src); break; ++ case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; ++ case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { ++ switch (size_in_bytes) { ++ case 8: sd(src, dst); break; ++ case 4: sw(src, dst); break; ++ case 2: sh(src, dst); break; ++ case 1: sb(src, dst); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++// rotate right with imm bits ++void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp) ++{ ++ if (UseZbb) { ++ rori(dst, src, shift); ++ return; ++ } ++ ++ assert_different_registers(dst, tmp); ++ assert_different_registers(src, tmp); ++ assert(shift < 64, "shift amount must be < 64"); ++ slli(tmp, src, 64 - shift); ++ srli(dst, src, shift); ++ orr(dst, dst, tmp); ++} ++ ++// reverse bytes in halfword in lower 16 bits and sign-extend ++// Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits) ++void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) { ++ if (UseZbb) { ++ rev8(Rd, Rs); ++ srai(Rd, Rd, 48); ++ return; ++ } ++ assert_different_registers(Rs, tmp); ++ assert_different_registers(Rd, tmp); ++ srli(tmp, Rs, 8); ++ andi(tmp, tmp, 0xFF); ++ slli(Rd, Rs, 56); ++ srai(Rd, Rd, 48); // sign-extend ++ orr(Rd, Rd, tmp); ++} ++ ++// reverse bytes in lower word and sign-extend ++// Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits) ++void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseZbb) { ++ rev8(Rd, Rs); ++ srai(Rd, Rd, 32); ++ return; ++ } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ revb_h_w_u(Rd, Rs, tmp1, tmp2); ++ slli(tmp2, Rd, 48); ++ srai(tmp2, tmp2, 32); // sign-extend ++ srli(Rd, Rd, 16); ++ orr(Rd, Rd, tmp2); ++} ++ ++// reverse bytes in halfword in lower 16 bits and zero-extend ++// Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits) ++void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) { ++ if (UseZbb) { ++ rev8(Rd, Rs); ++ srli(Rd, Rd, 48); ++ return; ++ } ++ assert_different_registers(Rs, tmp); ++ assert_different_registers(Rd, tmp); ++ srli(tmp, Rs, 8); ++ andi(tmp, tmp, 0xFF); ++ andi(Rd, Rs, 0xFF); ++ slli(Rd, Rd, 8); ++ orr(Rd, Rd, tmp); ++} ++ ++// reverse bytes in halfwords in lower 32 bits and zero-extend ++// Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits) ++void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseZbb) { ++ rev8(Rd, Rs); ++ rori(Rd, Rd, 32); ++ roriw(Rd, Rd, 16); ++ zero_extend(Rd, Rd, 32); ++ return; ++ } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ srli(tmp2, Rs, 16); ++ revb_h_h_u(tmp2, tmp2, tmp1); ++ revb_h_h_u(Rd, Rs, tmp1); ++ slli(tmp2, tmp2, 16); ++ orr(Rd, Rd, tmp2); ++} ++ ++// This method is only used for revb_h ++// Rd = Rs[47:0] Rs[55:48] Rs[63:56] ++void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1); ++ srli(tmp1, Rs, 48); ++ andi(tmp2, tmp1, 0xFF); ++ slli(tmp2, tmp2, 8); ++ srli(tmp1, tmp1, 8); ++ orr(tmp1, tmp1, tmp2); ++ slli(Rd, Rs, 16); ++ orr(Rd, Rd, tmp1); ++} ++// reverse bytes in each halfword ++// Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] ++void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseZbb) { ++ assert_different_registers(Rs, tmp1); ++ assert_different_registers(Rd, tmp1); ++ rev8(Rd, Rs); ++ zero_extend(tmp1, Rd, 32); ++ roriw(tmp1, tmp1, 16); ++ slli(tmp1, tmp1, 32); ++ srli(Rd, Rd, 32); ++ roriw(Rd, Rd, 16); ++ zero_extend(Rd, Rd, 32); ++ orr(Rd, Rd, tmp1); ++ return; ++ } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ revb_h_helper(Rd, Rs, tmp1, tmp2); ++ for (int i = 0; i < 3; ++i) { ++ revb_h_helper(Rd, Rd, tmp1, tmp2); ++ } ++} ++ ++// reverse bytes in each word ++// Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] ++void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseZbb) { ++ rev8(Rd, Rs); ++ rori(Rd, Rd, 32); ++ return; ++ } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ revb(Rd, Rs, tmp1, tmp2); ++ ror_imm(Rd, Rd, 32); ++} ++ ++// reverse bytes in doubleword ++// Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56] ++void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) { ++ if (UseZbb) { ++ rev8(Rd, Rs); ++ return; ++ } ++ assert_different_registers(Rs, tmp1, tmp2); ++ assert_different_registers(Rd, tmp1, tmp2); ++ andi(tmp1, Rs, 0xFF); ++ slli(tmp1, tmp1, 8); ++ for (int step = 8; step < 56; step += 8) { ++ srli(tmp2, Rs, step); ++ andi(tmp2, tmp2, 0xFF); ++ orr(tmp1, tmp1, tmp2); ++ slli(tmp1, tmp1, 8); ++ } ++ srli(Rd, Rs, 56); ++ andi(Rd, Rd, 0xFF); ++ orr(Rd, tmp1, Rd); ++} ++ ++void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) { ++ if (is_imm_in_range(imm, 12, 0)) { ++ and_imm12(Rd, Rn, imm); ++ } else { ++ assert_different_registers(Rn, tmp); ++ mv(tmp, imm); ++ andr(Rd, Rn, tmp); ++ } ++} ++ ++void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) { ++ ld(tmp1, adr); ++ if (src.is_register()) { ++ orr(tmp1, tmp1, src.as_register()); ++ } else { ++ if(is_imm_in_range(src.as_constant(), 12, 0)) { ++ ori(tmp1, tmp1, src.as_constant()); ++ } else { ++ assert_different_registers(tmp1, tmp2); ++ mv(tmp2, src.as_constant()); ++ orr(tmp1, tmp1, tmp2); ++ } ++ } ++ sd(tmp1, adr); ++} ++ ++void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) { ++ if (UseCompressedClassPointers) { ++ lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); ++ if (Universe::narrow_klass_base() == NULL) { ++ slli(tmp, tmp, Universe::narrow_klass_shift()); ++ beq(trial_klass, tmp, L); ++ return; ++ } ++ decode_klass_not_null(tmp); ++ } else { ++ ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); ++ } ++ beq(trial_klass, tmp, L); ++} ++ ++// Move an oop into a register. immediate is true if we want ++// immediate instrcutions, i.e. we are not going to patch this ++// instruction while the code is being executed by another thread. In ++// that case we can use move immediates rather than the constant pool. ++void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { ++ int oop_index; ++ if (obj == NULL) { ++ oop_index = oop_recorder()->allocate_oop_index(obj); ++ } else { ++#ifdef ASSERT ++ { ++ ThreadInVMfromUnknown tiv; ++ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); ++ } ++#endif ++ oop_index = oop_recorder()->find_index(obj); ++ } ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ if (!immediate) { ++ address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address ++ ld_constant(dst, Address(dummy, rspec)); ++ } else ++ mv(dst, Address((address)obj, rspec)); ++} ++ ++// Move a metadata address into a register. ++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { ++ int oop_index; ++ if (obj == NULL) { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } else { ++ oop_index = oop_recorder()->find_index(obj); ++ } ++ RelocationHolder rspec = metadata_Relocation::spec(oop_index); ++ mv(dst, Address((address)obj, rspec)); ++} ++ ++// Writes to stack successive pages until offset reached to check for ++// stack overflow + shadow pages. This clobbers tmp. ++void MacroAssembler::bang_stack_size(Register size, Register tmp) { ++ assert_different_registers(tmp, size, t0); ++ // Bang stack for total size given plus shadow page size. ++ // Bang one page at a time because large size can bang beyond yellow and ++ // red zones. ++ mv(t0, os::vm_page_size()); ++ Label loop; ++ bind(loop); ++ sub(tmp, sp, t0); ++ subw(size, size, t0); ++ sd(size, Address(tmp)); ++ bgtz(size, loop); ++ ++ // Bang down shadow pages too. ++ // At this point, (tmp-0) is the last address touched, so don't ++ // touch it again. (It was touched as (tmp-pagesize) but then tmp ++ // was post-decremented.) Skip this address by starting at i=1, and ++ // touch a few more pages below. N.B. It is important to touch all ++ // the way down to and including i=StackShadowPages. ++ for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { ++ // this could be any sized move but this is can be a debugging crumb ++ // so the bigger the better. ++ sub(tmp, tmp, os::vm_page_size()); ++ sd(size, Address(tmp, 0)); ++ } ++} ++ ++SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) { ++ int32_t offset = 0; ++ _masm = masm; ++ _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset); ++ _masm->lbu(t0, Address(t0, offset)); ++ _masm->beqz(t0, _label); ++} ++ ++SkipIfEqual::~SkipIfEqual() { ++ _masm->bind(_label); ++ _masm = NULL; ++} ++ ++void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) { ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ ld(dst, Address(xmethod, Method::const_offset())); ++ ld(dst, Address(dst, ConstMethod::constants_offset())); ++ ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes())); ++ ld(dst, Address(dst, mirror_offset)); ++ resolve_oop_handle(dst, tmp); ++} ++ ++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { ++ // OopHandle::resolve is an indirection. ++ assert_different_registers(result, tmp); ++ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg); ++} ++ ++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, ++ Register dst, Address src, ++ Register tmp1, Register thread_tmp) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } else { ++ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } ++} ++ ++void MacroAssembler::null_check(Register reg, int offset) { ++ if (needs_explicit_null_check(offset)) { ++ // provoke OS NULL exception if reg = NULL by ++ // accessing M[reg] w/o changing any registers ++ // NOTE: this is plenty to provoke a segv ++ ld(zr, Address(reg, 0)); ++ } else { ++ // nothing to do, (later) access of M[reg + offset] ++ // will provoke OS NULL exception if reg = NULL ++ } ++} ++ ++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, ++ Address dst, Register src, ++ Register tmp1, Register tmp2, Register tmp3) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); ++ } else { ++ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); ++ } ++} ++ ++// Algorithm must match CompressedOops::encode. ++void MacroAssembler::encode_heap_oop(Register d, Register s) { ++ verify_oop(s, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ srli(d, s, LogMinObjAlignmentInBytes); ++ } else { ++ mv(d, s); ++ } ++ } else { ++ Label notNull; ++ sub(d, s, xheapbase); ++ bgez(d, notNull); ++ mv(d, zr); ++ bind(notNull); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ srli(d, d, Universe::narrow_oop_shift()); ++ } ++ } ++} ++ ++void MacroAssembler::load_klass(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ decode_klass_not_null(dst); ++ } else { ++ ld(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ } ++} ++ ++void MacroAssembler::store_klass(Register dst, Register src) { ++ // FIXME: Should this be a store release? concurrent gcs assumes ++ // klass length is valid if klass field is not null. ++ if (UseCompressedClassPointers) { ++ encode_klass_not_null(src); ++ sw(src, Address(dst, oopDesc::klass_offset_in_bytes())); ++ } else { ++ sd(src, Address(dst, oopDesc::klass_offset_in_bytes())); ++ } ++} ++ ++void MacroAssembler::store_klass_gap(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ // Store to klass gap in destination ++ sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes())); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register r) { ++ decode_klass_not_null(r, r); ++} ++ ++void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ ++ if (Universe::narrow_klass_base() == NULL) { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ slli(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ mv(dst, src); ++ } ++ return; ++ } ++ ++ Register xbase = dst; ++ if (dst == src) { ++ xbase = tmp; ++ } ++ ++ assert_different_registers(src, xbase); ++ mv(xbase, (uintptr_t)Universe::narrow_klass_base()); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ assert_different_registers(t0, xbase); ++ shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes); ++ } else { ++ add(dst, xbase, src); ++ } ++ if (xbase == xheapbase) { reinit_heapbase(); } ++ ++} ++ ++void MacroAssembler::encode_klass_not_null(Register r) { ++ encode_klass_not_null(r, r); ++} ++ ++void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) { ++ assert(UseCompressedClassPointers, "should only be used for compressed headers"); ++ ++ if (Universe::narrow_klass_base() == NULL) { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ srli(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ mv(dst, src); ++ } ++ return; ++ } ++ ++ if (((uint64_t)(uintptr_t)Universe::narrow_klass_base() & 0xffffffff) == 0 && ++ Universe::narrow_klass_shift() == 0) { ++ zero_extend(dst, src, 32); ++ return; ++ } ++ ++ Register xbase = dst; ++ if (dst == src) { ++ xbase = tmp; ++ } ++ ++ assert_different_registers(src, xbase); ++ mv(xbase, (intptr_t)Universe::narrow_klass_base()); ++ sub(dst, src, xbase); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ srli(dst, dst, LogKlassAlignmentInBytes); ++ } ++ if (xbase == xheapbase) { ++ reinit_heapbase(); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register r) { ++ decode_heap_oop_not_null(r, r); ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { ++ assert(UseCompressedOops, "should only be used for compressed headers"); ++ assert(Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ slli(dst, src, LogMinObjAlignmentInBytes); ++ if (Universe::narrow_oop_base() != NULL) { ++ add(dst, xheapbase, dst); ++ } ++ } else { ++ assert(Universe::narrow_oop_base() == NULL, "sanity"); ++ mv(dst, src); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop(Register d, Register s) { ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0 || d != s) { ++ slli(d, s, Universe::narrow_oop_shift()); ++ } ++ } else { ++ Label done; ++ mv(d, s); ++ beqz(s, done); ++ shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes); ++ bind(done); ++ } ++ verify_oop(d, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, ++ Register tmp2, Register tmp3, DecoratorSet decorators) { ++ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3); ++} ++ ++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp); ++} ++ ++// Used for storing NULLs. ++void MacroAssembler::store_heap_oop_null(Address dst) { ++ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); ++} ++ ++int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2, ++ bool want_remainder) ++{ ++ // Full implementation of Java idiv and irem. The function ++ // returns the (pc) offset of the div instruction - may be needed ++ // for implicit exceptions. ++ // ++ // input : rs1: dividend ++ // rs2: divisor ++ // ++ // result: either ++ // quotient (= rs1 idiv rs2) ++ // remainder (= rs1 irem rs2) ++ ++ ++ int idivl_offset = offset(); ++ if (!want_remainder) { ++ divw(result, rs1, rs2); ++ } else { ++ remw(result, rs1, rs2); // result = rs1 % rs2; ++ } ++ return idivl_offset; ++} ++ ++int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2, ++ bool want_remainder) ++{ ++ // Full implementation of Java ldiv and lrem. The function ++ // returns the (pc) offset of the div instruction - may be needed ++ // for implicit exceptions. ++ // ++ // input : rs1: dividend ++ // rs2: divisor ++ // ++ // result: either ++ // quotient (= rs1 idiv rs2) ++ // remainder (= rs1 irem rs2) ++ ++ int idivq_offset = offset(); ++ if (!want_remainder) { ++ div(result, rs1, rs2); ++ } else { ++ rem(result, rs1, rs2); // result = rs1 % rs2; ++ } ++ return idivq_offset; ++} ++ ++// Look up the method for a megamorpic invkkeinterface call. ++// The target method is determined by . ++// The receiver klass is in recv_klass. ++// On success, the result will be in method_result, and execution falls through. ++// On failure, execution transfers to the given label. ++void MacroAssembler::lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_tmp, ++ Label& L_no_such_interface, ++ bool return_method) { ++ assert_different_registers(recv_klass, intf_klass, scan_tmp); ++ assert_different_registers(method_result, intf_klass, scan_tmp); ++ assert(recv_klass != method_result || !return_method, ++ "recv_klass can be destroyed when mehtid isn't needed"); ++ assert(itable_index.is_constant() || itable_index.as_register() == method_result, ++ "caller must be same register for non-constant itable index as for method"); ++ ++ // Compute start of first itableOffsetEntry (which is at the end of the vtable). ++ int vtable_base = in_bytes(Klass::vtable_start_offset()); ++ int itentry_off = itableMethodEntry::method_offset_in_bytes(); ++ int scan_step = itableOffsetEntry::size() * wordSize; ++ int vte_size = vtableEntry::size_in_bytes(); ++ assert(vte_size == wordSize, "else adjust times_vte_scale"); ++ ++ lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset())); ++ ++ // %%% Could store the aligned, prescaled offset in the klassoop. ++ shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3); ++ add(scan_tmp, scan_tmp, vtable_base); ++ ++ if (return_method) { ++ // Adjust recv_klass by scaled itable_index, so we can free itable_index. ++ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ if (itable_index.is_register()) { ++ slli(t0, itable_index.as_register(), 3); ++ } else { ++ mv(t0, itable_index.as_constant() << 3); ++ } ++ add(recv_klass, recv_klass, t0); ++ if (itentry_off) { ++ add(recv_klass, recv_klass, itentry_off); ++ } ++ } ++ ++ Label search, found_method; ++ ++ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); ++ beq(intf_klass, method_result, found_method); ++ bind(search); ++ // Check that the previous entry is non-null. A null entry means that ++ // the receiver class doens't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ beqz(method_result, L_no_such_interface, /* is_far */ true); ++ addi(scan_tmp, scan_tmp, scan_step); ++ ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes())); ++ bne(intf_klass, method_result, search); ++ ++ bind(found_method); ++ ++ // Got a hit. ++ if (return_method) { ++ lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes())); ++ add(method_result, recv_klass, scan_tmp); ++ ld(method_result, Address(method_result)); ++ } ++} ++ ++// virtual method calling ++void MacroAssembler::lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result) { ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == 8, ++ "adjust the scaling in the code below"); ++ int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes(); ++ ++ if (vtable_index.is_register()) { ++ shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord); ++ ld(method_result, Address(method_result, vtable_offset_in_bytes)); ++ } else { ++ vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; ++ Address addr = form_address(recv_klass, /* base */ ++ vtable_offset_in_bytes, /* offset */ ++ 12, /* expect offset bits */ ++ method_result); /* temp reg */ ++ ld(method_result, addr); ++ } ++} ++ ++void MacroAssembler::membar(uint32_t order_constraint) { ++ if (!os::is_MP()) { return; } ++ ++ address prev = pc() - NativeMembar::instruction_size; ++ address last = code()->last_insn(); ++ ++ if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) { ++ NativeMembar *bar = NativeMembar_at(prev); ++ // We are merging two memory barrier instructions. On RISCV we ++ // can do this simply by ORing them together. ++ bar->set_kind(bar->get_kind() | order_constraint); ++ BLOCK_COMMENT("merged membar"); ++ } else { ++ code()->set_last_insn(pc()); ++ ++ uint32_t predecessor = 0; ++ uint32_t successor = 0; ++ ++ membar_mask_to_pred_succ(order_constraint, predecessor, successor); ++ fence(predecessor, successor); ++ } ++} ++ ++void MacroAssembler::check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Label& L_success) { ++ Label L_failure; ++ check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL); ++ check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL); ++ bind(L_failure); ++} ++ ++// Write serialization page so VM thread can do a pseudo remote membar. ++// We use the current thread pointer to calculate a thread specific ++// offset to write to within the page. This minimizes bus traffic ++// due to cache line collision. ++void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) { ++ srli(tmp2, thread, os::get_serialize_page_shift_count()); ++ ++ int mask = os::vm_page_size() - sizeof(int); ++ andi(tmp2, tmp2, mask, tmp1); ++ ++ add(tmp1, tmp2, (intptr_t)os::get_memory_serialize_page()); ++ membar(MacroAssembler::AnyAny); ++ sw(zr, Address(tmp1)); ++} ++ ++void MacroAssembler::safepoint_poll(Label& slow_path) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(t1, Address(xthread, Thread::polling_page_offset())); ++ andi(t0, t1, SafepointMechanism::poll_bit()); ++ bnez(t0, slow_path); ++ } else { ++ int32_t offset = 0; ++ la_patchable(t0, ExternalAddress(SafepointSynchronize::address_of_state()), offset); ++ lwu(t0, Address(t0, offset)); ++ assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); ++ bnez(t0, slow_path); ++ } ++} ++ ++// Just like safepoint_poll, but use an acquiring load for thread- ++// local polling. ++// ++// We need an acquire here to ensure that any subsequent load of the ++// global SafepointSynchronize::_state flag is ordered after this load ++// of the local Thread::_polling page. We don't want this poll to ++// return false (i.e. not safepointing) and a later poll of the global ++// SafepointSynchronize::_state spuriously to return true. ++// ++// This is to avoid a race when we're in a native->Java transition ++// racing the code which wakes up from a safepoint. ++// ++void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ membar(MacroAssembler::AnyAny); ++ ld(t1, Address(xthread, Thread::polling_page_offset())); ++ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ andi(t0, t1, SafepointMechanism::poll_bit()); ++ bnez(t0, slow_path); ++ } else { ++ safepoint_poll(slow_path); ++ } ++} ++ ++void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, ++ Label &succeed, Label *fail) { ++ // oldv holds comparison value ++ // newv holds value to write in exchange ++ // addr identifies memory word to compare against/update ++ Label retry_load, nope; ++ bind(retry_load); ++ // flush and load exclusive from the memory location ++ // and fail if it is not what we expect ++ lr_d(tmp, addr, Assembler::aqrl); ++ bne(tmp, oldv, nope); ++ // if we store+flush with no intervening write tmp wil be zero ++ sc_d(tmp, newv, addr, Assembler::rl); ++ beqz(tmp, succeed); ++ // retry so we only ever return after a load fails to compare ++ // ensures we don't return a stale value after a failed write. ++ j(retry_load); ++ // if the memory word differs we return it in oldv and signal a fail ++ bind(nope); ++ membar(AnyAny); ++ mv(oldv, tmp); ++ if (fail != NULL) { ++ j(*fail); ++ } ++} ++ ++void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, ++ Label &succeed, Label *fail) { ++ assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); ++ cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); ++} ++ ++void MacroAssembler::load_reserved(Register addr, ++ enum operand_size size, ++ Assembler::Aqrl acquire) { ++ switch (size) { ++ case int64: ++ lr_d(t0, addr, acquire); ++ break; ++ case int32: ++ lr_w(t0, addr, acquire); ++ break; ++ case uint32: ++ lr_w(t0, addr, acquire); ++ zero_extend(t0, t0, 32); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_conditional(Register addr, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl release) { ++ switch (size) { ++ case int64: ++ sc_d(t0, new_val, addr, release); ++ break; ++ case int32: ++ case uint32: ++ sc_w(t0, new_val, addr, release); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Register tmp1, Register tmp2, Register tmp3) { ++ assert(size == int8 || size == int16, "unsupported operand size"); ++ ++ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3; ++ ++ andi(shift, addr, 3); ++ slli(shift, shift, 3); ++ ++ andi(aligned_addr, addr, ~3); ++ ++ if (size == int8) { ++ mv(mask, 0xff); ++ } else { ++ mv(mask, -1); ++ zero_extend(mask, mask, 16); ++ } ++ sll(mask, mask, shift); ++ ++ xori(not_mask, mask, -1); ++ ++ sll(expected, expected, shift); ++ andr(expected, expected, mask); ++ ++ sll(new_val, new_val, shift); ++ andr(new_val, new_val, mask); ++} ++ ++// cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps. ++// It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w, ++// which are forced to work with 4-byte aligned address. ++void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, bool result_as_bool, ++ Register tmp1, Register tmp2, Register tmp3) { ++ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; ++ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); ++ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); ++ ++ Label retry, fail, done; ++ ++ bind(retry); ++ lr_w(old, aligned_addr, acquire); ++ andr(tmp, old, mask); ++ bne(tmp, expected, fail); ++ ++ andr(tmp, old, not_mask); ++ orr(tmp, tmp, new_val); ++ sc_w(tmp, tmp, aligned_addr, release); ++ bnez(tmp, retry); ++ ++ if (result_as_bool) { ++ mv(result, 1); ++ j(done); ++ ++ bind(fail); ++ mv(result, zr); ++ ++ bind(done); ++ } else { ++ andr(tmp, old, mask); ++ ++ bind(fail); ++ srl(result, tmp, shift); ++ } ++ ++ if (size == int8) { ++ sign_extend(result, result, 8); ++ } else if (size == int16) { ++ sign_extend(result, result, 16); ++ } ++} ++ ++// weak cmpxchg narrow value will kill t0, t1, expected, new_val and tmps. ++// weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement ++// the weak CAS stuff. The major difference is that it just failed when store conditional ++// failed. ++void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, ++ Register tmp1, Register tmp2, Register tmp3) { ++ Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0; ++ assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp); ++ cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3); ++ ++ Label fail, done; ++ ++ lr_w(old, aligned_addr, acquire); ++ andr(tmp, old, mask); ++ bne(tmp, expected, fail); ++ ++ andr(tmp, old, not_mask); ++ orr(tmp, tmp, new_val); ++ sc_w(tmp, tmp, aligned_addr, release); ++ bnez(tmp, fail); ++ ++ // Success ++ mv(result, 1); ++ j(done); ++ ++ // Fail ++ bind(fail); ++ mv(result, zr); ++ ++ bind(done); ++} ++ ++void MacroAssembler::cmpxchg(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, bool result_as_bool) { ++ assert(size != int8 && size != int16, "unsupported operand size"); ++ ++ Label retry_load, done, ne_done; ++ bind(retry_load); ++ load_reserved(addr, size, acquire); ++ bne(t0, expected, ne_done); ++ store_conditional(addr, new_val, size, release); ++ bnez(t0, retry_load); ++ ++ // equal, succeed ++ if (result_as_bool) { ++ mv(result, 1); ++ } else { ++ mv(result, expected); ++ } ++ j(done); ++ ++ // not equal, failed ++ bind(ne_done); ++ if (result_as_bool) { ++ mv(result, zr); ++ } else { ++ mv(result, t0); ++ } ++ ++ bind(done); ++} ++ ++void MacroAssembler::cmpxchg_weak(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result) { ++ assert(size != int8 && size != int16, "unsupported operand size"); ++ ++ Label fail, done; ++ load_reserved(addr, size, acquire); ++ bne(t0, expected, fail); ++ store_conditional(addr, new_val, size, release); ++ bnez(t0, fail); ++ ++ // Success ++ mv(result, 1); ++ j(done); ++ ++ // Fail ++ bind(fail); ++ mv(result, zr); ++ ++ bind(done); ++} ++ ++#define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE) \ ++void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \ ++ prev = prev->is_valid() ? prev : zr; \ ++ if (incr.is_register()) { \ ++ AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ ++ } else { \ ++ mv(t0, incr.as_constant()); \ ++ AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ ++ } \ ++ return; \ ++} ++ ++ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed) ++ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed) ++ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl) ++ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl) ++ ++#undef ATOMIC_OP ++ ++#define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE) \ ++void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ ++ prev = prev->is_valid() ? prev : zr; \ ++ AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE)); \ ++ return; \ ++} ++ ++ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed) ++ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed) ++ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl) ++ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl) ++ ++#undef ATOMIC_XCHG ++ ++#define ATOMIC_XCHGU(OP1, OP2) \ ++void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) { \ ++ atomic_##OP2(prev, newv, addr); \ ++ zero_extend(prev, prev, 32); \ ++ return; \ ++} ++ ++ATOMIC_XCHGU(xchgwu, xchgw) ++ATOMIC_XCHGU(xchgalwu, xchgalw) ++ ++#undef ATOMIC_XCHGU ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); // 1 << 3 ++ sub(tmp_reg, tmp_reg, markOopDesc::biased_lock_pattern); ++ if (flag->is_valid()) { mv(flag, tmp_reg); } ++ beqz(tmp_reg, done); ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++int MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters, ++ Register flag) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ assert_different_registers(lock_reg, obj_reg, swap_reg); ++ ++ if (PrintBiasedLockingStatistics && counters == NULL) { ++ counters = BiasedLocking::counters(); ++ } ++ ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, t0, flag); ++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ int null_check_offset = -1; ++ if (!swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ ld(swap_reg, mark_addr); ++ } ++ andi(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); ++ xori(t0, tmp_reg, markOopDesc::biased_lock_pattern); ++ bnez(t0, cas_label); // don't care flag unless jumping to done ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ load_prototype_header(tmp_reg, obj_reg); ++ orr(tmp_reg, tmp_reg, xthread); ++ xorr(tmp_reg, swap_reg, tmp_reg); ++ andi(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); ++ if (flag->is_valid()) { ++ mv(flag, tmp_reg); ++ } ++ ++ if (counters != NULL) { ++ Label around; ++ bnez(tmp_reg, around); ++ atomic_incw(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, t0); ++ j(done); ++ bind(around); ++ } else { ++ beqz(tmp_reg, done); ++ } ++ ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ andi(t0, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ bnez(t0, try_revoke_bias); ++ ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ andi(t0, tmp_reg, markOopDesc::epoch_mask_in_place); ++ bnez(t0, try_rebias); ++ ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ { ++ Label cas_success; ++ Label counter; ++ mv(t0, (int64_t)(markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); ++ andr(swap_reg, swap_reg, t0); ++ orr(tmp_reg, swap_reg, xthread); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); ++ // cas failed here if slow_cass == NULL ++ if (flag->is_valid()) { ++ mv(flag, 1); ++ j(counter); ++ } ++ ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ bind(cas_success); ++ if (flag->is_valid()) { ++ mv(flag, 0); ++ bind(counter); ++ } ++ ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->anonymously_biased_lock_entry_count_addr()), ++ tmp_reg, t0); ++ } ++ } ++ j(done); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ { ++ Label cas_success; ++ Label counter; ++ load_prototype_header(tmp_reg, obj_reg); ++ orr(tmp_reg, xthread, tmp_reg); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, slow_case); ++ // cas failed here if slow_cass == NULL ++ if (flag->is_valid()) { ++ mv(flag, 1); ++ j(counter); ++ } ++ ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ bind(cas_success); ++ if (flag->is_valid()) { ++ mv(flag, 0); ++ bind(counter); ++ } ++ ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->rebiased_lock_entry_count_addr()), ++ tmp_reg, t0); ++ } ++ } ++ j(done); ++ ++ // don't care flag unless jumping to done ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ { ++ Label cas_success, nope; ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, t0, cas_success, &nope); ++ bind(cas_success); ++ ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (counters != NULL) { ++ atomic_incw(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, ++ t0); ++ } ++ bind(nope); ++ } ++ ++ bind(cas_label); ++ ++ return null_check_offset; ++} ++ ++void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) { ++ Label retry_load; ++ bind(retry_load); ++ // flush and load exclusive from the memory location ++ lr_w(tmp, counter_addr); ++ addw(tmp, tmp, 1); ++ // if we store+flush with no intervening write tmp wil be zero ++ sc_w(tmp, tmp, counter_addr); ++ bnez(tmp, retry_load); ++} ++ ++void MacroAssembler::far_jump(Address entry, Register tmp) { ++ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); ++ assert(CodeCache::find_blob(entry.target()) != NULL, ++ "destination of far call not found in code cache"); ++ int32_t offset = 0; ++ if (far_branches()) { ++ // We can use auipc + jalr here because we know that the total size of ++ // the code cache cannot exceed 2Gb. ++ la_patchable(tmp, entry, offset); ++ jalr(x0, tmp, offset); ++ } else { ++ j(entry); ++ } ++} ++ ++void MacroAssembler::far_call(Address entry, Register tmp) { ++ assert(ReservedCodeCacheSize < 4*G, "branch out of range"); ++ assert(CodeCache::find_blob(entry.target()) != NULL, ++ "destination of far call not found in code cache"); ++ int32_t offset = 0; ++ if (far_branches()) { ++ // We can use auipc + jalr here because we know that the total size of ++ // the code cache cannot exceed 2Gb. ++ la_patchable(tmp, entry, offset); ++ jalr(x1, tmp, offset); // link ++ } else { ++ jal(entry); // link ++ } ++} ++ ++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ Register super_check_offset) { ++ assert_different_registers(sub_klass, super_klass, tmp_reg); ++ bool must_load_sco = (super_check_offset == noreg); ++ if (must_load_sco) { ++ assert(tmp_reg != noreg, "supply either a tmp or a register offset"); ++ } else { ++ assert_different_registers(sub_klass, super_klass, super_check_offset); ++ } ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in batch"); ++ ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ Address super_check_offset_addr(super_klass, sco_offset); ++ ++ // Hacked jmp, which may only be used just before L_fallthrough. ++#define final_jmp(label) \ ++ if (&(label) == &L_fallthrough) { /*do nothing*/ } \ ++ else j(label) /*omit semi*/ ++ ++ // If the pointers are equal, we are done (e.g., String[] elements). ++ // This self-check enables sharing of secondary supertype arrays among ++ // non-primary types such as array-of-interface. Otherwise, each such ++ // type would need its own customized SSA. ++ // We move this check to the front fo the fast path because many ++ // type checks are in fact trivially successful in this manner, ++ // so we get a nicely predicted branch right at the start of the check. ++ beq(sub_klass, super_klass, *L_success); ++ ++ // Check the supertype display: ++ if (must_load_sco) { ++ lwu(tmp_reg, super_check_offset_addr); ++ super_check_offset = tmp_reg; ++ } ++ add(t0, sub_klass, super_check_offset); ++ Address super_check_addr(t0); ++ ld(t0, super_check_addr); // load displayed supertype ++ ++ // Ths check has worked decisively for primary supers. ++ // Secondary supers are sought in the super_cache ('super_cache_addr'). ++ // (Secondary supers are interfaces and very deeply nested subtypes.) ++ // This works in the same check above because of a tricky aliasing ++ // between the super_Cache and the primary super dispaly elements. ++ // (The 'super_check_addr' can address either, as the case requires.) ++ // Note that the cache is updated below if it does not help us find ++ // what we need immediately. ++ // So if it was a primary super, we can just fail immediately. ++ // Otherwise, it's the slow path for us (no success at this point). ++ ++ beq(super_klass, t0, *L_success); ++ mv(t1, sc_offset); ++ if (L_failure == &L_fallthrough) { ++ beq(super_check_offset, t1, *L_slow_path); ++ } else { ++ bne(super_check_offset, t1, *L_failure, /* is_far */ true); ++ final_jmp(*L_slow_path); ++ } ++ ++ bind(L_fallthrough); ++ ++#undef final_jmp ++} ++ ++// Scans count pointer sized words at [addr] for occurence of value, ++// generic ++void MacroAssembler::repne_scan(Register addr, Register value, Register count, ++ Register tmp) { ++ Label Lloop, Lexit; ++ beqz(count, Lexit); ++ bind(Lloop); ++ ld(tmp, addr); ++ beq(value, tmp, Lexit); ++ add(addr, addr, wordSize); ++ sub(count, count, 1); ++ bnez(count, Lloop); ++ bind(Lexit); ++} ++ ++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Register tmp2_reg, ++ Label* L_success, ++ Label* L_failure) { ++ assert_different_registers(sub_klass, super_klass, tmp_reg); ++ if (tmp2_reg != noreg) { ++ assert_different_registers(sub_klass, super_klass, tmp_reg, tmp2_reg, t0); ++ } ++#define IS_A_TEMP(reg) ((reg) == tmp_reg || (reg) == tmp2_reg) ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ // A couple of useful fields in sub_klass: ++ int ss_offset = in_bytes(Klass::secondary_supers_offset()); ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ Address secondary_supers_addr(sub_klass, ss_offset); ++ Address super_cache_addr( sub_klass, sc_offset); ++ ++ BLOCK_COMMENT("check_klass_subtype_slow_path"); ++ ++ // Do a linear scan of the secondary super-klass chain. ++ // This code is rarely used, so simplicity is a virtue here. ++ // The repne_scan instruction uses fixed registers, which we must spill. ++ // Don't worry too much about pre-existing connecitons with the input regs. ++ ++ assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super) ++ assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter) ++ ++ RegSet pushed_registers; ++ if (!IS_A_TEMP(x12)) { ++ pushed_registers += x12; ++ } ++ if (!IS_A_TEMP(x15)) { ++ pushed_registers += x15; ++ } ++ ++ if (super_klass != x10 || UseCompressedOops) { ++ if (!IS_A_TEMP(x10)) { ++ pushed_registers += x10; ++ } ++ } ++ ++ push_reg(pushed_registers, sp); ++ ++ // Get super_klass value into x10 (even if it was in x15 or x12) ++ mv(x10, super_klass); ++ ++#ifndef PRODUCT ++ mv(t1, (address)&SharedRuntime::_partial_subtype_ctr); ++ Address pst_counter_addr(t1); ++ ld(t0, pst_counter_addr); ++ add(t0, t0, 1); ++ sd(t0, pst_counter_addr); ++#endif // PRODUCT ++ ++ // We will consult the secondary-super array. ++ ld(x15, secondary_supers_addr); ++ // Load the array length. ++ lwu(x12, Address(x15, Array::length_offset_in_bytes())); ++ // Skip to start of data. ++ add(x15, x15, Array::base_offset_in_bytes()); ++ ++ // Set t0 to an obvious invalid value, falling through by default ++ mv(t0, -1); ++ // Scan X12 words at [X15] for an occurrence of X10. ++ repne_scan(x15, x10, x12, t0); ++ ++ // pop will restore x10, so we should use a temp register to keep its value ++ mv(t1, x10); ++ ++ // Unspill the temp. registers: ++ pop_reg(pushed_registers, sp); ++ ++ bne(t1, t0, *L_failure); ++ ++ // Success. Cache the super we found an proceed in triumph. ++ sd(super_klass, super_cache_addr); ++ ++ if (L_success != &L_fallthrough) { ++ j(*L_success); ++ } ++ ++#undef IS_A_TEMP ++ ++ bind(L_fallthrough); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes. ++void MacroAssembler::tlab_allocate(Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1, ++ Register tmp2, ++ Label& slow_case, ++ bool is_far) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void MacroAssembler::eden_allocate(Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register tmp1, ++ Label& slow_case, ++ bool is_far) { ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, is_far); ++} ++ ++ ++// get_thread() can be called anywhere inside generated code so we ++// need to save whatever non-callee save context might get clobbered ++// by the call to Thread::current() or, indeed, the call setup code ++void MacroAssembler::get_thread(Register thread) { ++ // save all call-clobbered regs except thread ++ RegSet saved_regs = RegSet::of(x10) + ra - thread; ++ push_reg(saved_regs, sp); ++ ++ mv(ra, CAST_FROM_FN_PTR(address, Thread::current)); ++ jalr(ra); ++ if (thread != c_rarg0) { ++ mv(thread, c_rarg0); ++ } ++ ++ // restore pushed registers ++ pop_reg(saved_regs, sp); ++} ++ ++void MacroAssembler::load_byte_map_base(Register reg) { ++ jbyte *byte_map_base = ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); ++ mv(reg, (uint64_t)byte_map_base); ++} ++ ++void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) { ++ relocInfo::relocType rtype = dest.rspec().reloc()->type(); ++ unsigned long low_address = (uintptr_t)CodeCache::low_bound(); ++ unsigned long high_address = (uintptr_t)CodeCache::high_bound(); ++ unsigned long dest_address = (uintptr_t)dest.target(); ++ long offset_low = dest_address - low_address; ++ long offset_high = dest_address - high_address; ++ ++ assert(is_valid_riscv64_address(dest.target()), "bad address"); ++ assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address"); ++ ++ code_section()->relocate(pc(), dest.rspec()); ++ // RISC-V doesn't compute a page-aligned address, in order to partially ++ // compensate for the use of *signed* offsets in its base+disp12 ++ // addressing mode (RISC-V's PC-relative reach remains asymmetric ++ // [-(2G + 2K), 2G - 2K)). ++ if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) { ++ int64_t distance = dest.target() - pc(); ++ auipc(reg1, (int32_t)distance + 0x800); ++ offset = ((int32_t)distance << 20) >> 20; ++ } else { ++ movptr_with_offset(reg1, dest.target(), offset); ++ } ++} ++ ++void MacroAssembler::build_frame(int framesize) { ++ assert(framesize > 0, "framesize must be > 0"); ++ sub(sp, sp, framesize); ++ sd(fp, Address(sp, framesize - 2 * wordSize)); ++ sd(ra, Address(sp, framesize - wordSize)); ++ if (PreserveFramePointer) { add(fp, sp, framesize); } ++} ++ ++void MacroAssembler::remove_frame(int framesize) { ++ assert(framesize > 0, "framesize must be > 0"); ++ ld(fp, Address(sp, framesize - 2 * wordSize)); ++ ld(ra, Address(sp, framesize - wordSize)); ++ add(sp, sp, framesize); ++} ++ ++void MacroAssembler::reserved_stack_check() { ++ // testing if reserved zone needs to be enabled ++ Label no_reserved_zone_enabling; ++ ++ ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset())); ++ bltu(sp, t0, no_reserved_zone_enabling); ++ ++ enter(); // RA and FP are live. ++ mv(c_rarg0, xthread); ++ int32_t offset = 0; ++ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset); ++ jalr(x1, t0, offset); ++ leave(); ++ ++ // We have already removed our own frame. ++ // throw_delayed_StackOverflowError will think that it's been ++ // called by our caller. ++ offset = 0; ++ la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset); ++ jalr(x0, t0, offset); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++} ++ ++// Move the address of the polling page into dest. ++void MacroAssembler::get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(dest, Address(xthread, Thread::polling_page_offset())); ++ } else { ++ uint64_t align = (uint64_t)page & 0xfff; ++ assert(align == 0, "polling page must be page aligned"); ++ la_patchable(dest, Address(page, rtype), offset); ++ } ++} ++ ++// Move the address of the polling page into dest. ++void MacroAssembler::read_polling_page(Register dest, address page, relocInfo::relocType rtype) { ++ int32_t offset = 0; ++ get_polling_page(dest, page, offset, rtype); ++ read_polling_page(dest, offset, rtype); ++} ++ ++// Read the polling page. The address of the polling page must ++// already be in r. ++void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) { ++ code_section()->relocate(pc(), rtype); ++ lwu(zr, Address(r, offset)); ++} ++ ++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { ++#ifdef ASSERT ++ { ++ ThreadInVMfromUnknown tiv; ++ assert (UseCompressedOops, "should only be used for compressed oops"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); ++ } ++#endif ++ int oop_index = oop_recorder()->find_index(obj); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ code_section()->relocate(pc(), rspec); ++ li32(dst, 0xDEADBEEF); ++ zero_extend(dst, dst, 32); ++} ++ ++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ int index = oop_recorder()->find_index(k); ++ assert(!Universe::heap()->is_in_reserved(k), "should not be an oop"); ++ ++ RelocationHolder rspec = metadata_Relocation::spec(index); ++ code_section()->relocate(pc(), rspec); ++ narrowKlass nk = Klass::encode_klass(k); ++ li32(dst, nk); ++ zero_extend(dst, dst, 32); ++} ++ ++// Maybe emit a call via a trampoline. If the code cache is small ++// trampolines won't be emitted. ++address MacroAssembler::trampoline_call(Address entry) { ++ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); ++ assert(entry.rspec().type() == relocInfo::runtime_call_type || ++ entry.rspec().type() == relocInfo::opt_virtual_call_type || ++ entry.rspec().type() == relocInfo::static_call_type || ++ entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); ++ ++ // We need a trampoline if branches are far. ++ if (far_branches()) { ++ bool in_scratch_emit_size = false; ++#ifdef COMPILER2 ++ // We don't want to emit a trampoline if C2 is generating dummy ++ // code during its branch shortening phase. ++ CompileTask* task = ciEnv::current()->task(); ++ in_scratch_emit_size = ++ (task != NULL && is_c2_compile(task->comp_level()) && ++ Compile::current()->in_scratch_emit_size()); ++#endif ++ if (!in_scratch_emit_size) { ++ address stub = emit_trampoline_stub(offset(), entry.target()); ++ if (stub == NULL) { ++ postcond(pc() == badAddress); ++ return NULL; // CodeCache is full ++ } ++ } ++ } ++ ++ address call_pc = pc(); ++ relocate(entry.rspec()); ++ if (!far_branches()) { ++ jal(entry.target()); ++ } else { ++ jal(pc()); ++ } ++ ++ postcond(pc() != badAddress); ++ return call_pc; ++} ++ ++address MacroAssembler::ic_call(address entry, jint method_index) { ++ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); ++ movptr(t1, (address)Universe::non_oop_word()); ++ assert_cond(entry != NULL); ++ return trampoline_call(Address(entry, rh)); ++} ++ ++// Emit a trampoline stub for a call to a target which is too far away. ++// ++// code sequences: ++// ++// call-site: ++// branch-and-link to or ++// ++// Related trampoline stub for this call site in the stub section: ++// load the call target from the constant pool ++// branch (RA still points to the call site above) ++ ++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ++ address dest) { ++ // Max stub size: alignment nop, TrampolineStub. ++ address stub = start_a_stub(NativeInstruction::instruction_size + NativeCallTrampolineStub::instruction_size); ++ if (stub == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ ++ // Create a trampoline stub relocation which relates this trampoline stub ++ // with the call instruction at insts_call_instruction_offset in the ++ // instructions code-section. ++ ++ // make sure 4 byte aligned here, so that the destination address would be ++ // 8 byte aligned after 3 intructions ++ while (offset() % wordSize == 0) { nop(); } ++ ++ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + ++ insts_call_instruction_offset)); ++ const int stub_start_offset = offset(); ++ ++ // Now, create the trampoline stub's code: ++ // - load the call ++ // - call ++ Label target; ++ ld(t0, target); // auipc + ld ++ jr(t0); // jalr ++ bind(target); ++ assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, ++ "should be"); ++ emit_int64((intptr_t)dest); ++ ++ const address stub_start_addr = addr_at(stub_start_offset); ++ ++ assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); ++ ++ end_a_stub(); ++ return stub_start_addr; ++} ++ ++Address MacroAssembler::add_memory_helper(const Address dst) { ++ switch (dst.getMode()) { ++ case Address::base_plus_offset: ++ // This is the expected mode, although we allow all the other ++ // forms below. ++ return form_address(dst.base(), dst.offset(), 12, t1); ++ default: ++ la(t1, dst); ++ return Address(t1); ++ } ++} ++ ++void MacroAssembler::increment(const Address dst, int64_t value) { ++ assert(((dst.getMode() == Address::base_plus_offset && ++ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), ++ "invalid value and address mode combination"); ++ Address adr = add_memory_helper(dst); ++ assert(!adr.uses(t0), "invalid dst for address increment"); ++ ld(t0, adr); ++ add(t0, t0, value, t1); ++ sd(t0, adr); ++} ++ ++void MacroAssembler::incrementw(const Address dst, int32_t value) { ++ assert(((dst.getMode() == Address::base_plus_offset && ++ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), ++ "invalid value and address mode combination"); ++ Address adr = add_memory_helper(dst); ++ assert(!adr.uses(t0), "invalid dst for address increment"); ++ lwu(t0, adr); ++ addw(t0, t0, value, t1); ++ sw(t0, adr); ++} ++ ++void MacroAssembler::decrement(const Address dst, int64_t value) { ++ assert(((dst.getMode() == Address::base_plus_offset && ++ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), ++ "invalid value and address mode combination"); ++ Address adr = add_memory_helper(dst); ++ assert(!adr.uses(t0), "invalid dst for address decrement"); ++ ld(t0, adr); ++ sub(t0, t0, value, t1); ++ sd(t0, adr); ++} ++ ++void MacroAssembler::decrementw(const Address dst, int32_t value) { ++ assert(((dst.getMode() == Address::base_plus_offset && ++ is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)), ++ "invalid value and address mode combination"); ++ Address adr = add_memory_helper(dst); ++ assert(!adr.uses(t0), "invalid dst for address decrement"); ++ lwu(t0, adr); ++ subw(t0, t0, value, t1); ++ sw(t0, adr); ++} ++ ++void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) { ++ assert_different_registers(src1, t0); ++ int32_t offset; ++ la_patchable(t0, src2, offset); ++ ld(t0, Address(t0, offset)); ++ beq(src1, t0, equal); ++} ++ ++void MacroAssembler::oop_equal(Register obj1, Register obj2, Label& equal, bool is_far) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->obj_equals(this, obj1, obj2, equal, is_far); ++} ++ ++void MacroAssembler::oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->obj_nequals(this, obj1, obj2, nequal, is_far); ++} ++ ++#ifdef COMPILER2 ++// Set dst NaN if either source is NaN. ++void MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, ++ bool is_double, bool is_min) { ++ assert_different_registers(dst, src1, src2); ++ Label Ldone; ++ fsflags(zr); ++ if (is_double) { ++ if (is_min) { ++ fmin_d(dst, src1, src2); ++ } else { ++ fmax_d(dst, src1, src2); ++ } ++ // flt is just used for set fflag NV ++ flt_d(zr, src1, src2); ++ } else { ++ if (is_min) { ++ fmin_s(dst, src1, src2); ++ } else { ++ fmax_s(dst, src1, src2); ++ } ++ // flt is just used for set fflag NV ++ flt_s(zr, src1, src2); ++ } ++ frflags(t0); ++ beqz(t0, Ldone); ++ ++ // Src1 or src2 must be NaN here. Set dst NaN. ++ if (is_double) { ++ fadd_d(dst, src1, src2); ++ } else { ++ fadd_s(dst, src1, src2); ++ } ++ bind(Ldone); ++} ++ ++address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, ++ Register tmp4, Register tmp5, Register tmp6, Register result, ++ Register cnt1, int elem_size) { ++ Label DONE, SAME, NEXT_DWORD, SHORT, TAIL, TAIL2, IS_TMP5_ZR; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ Register cnt2 = tmp2; // cnt2 only used in array length compare ++ Register elem_per_word = tmp6; ++ int log_elem_size = exact_log2(elem_size); ++ int length_offset = arrayOopDesc::length_offset_in_bytes(); ++ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); ++ ++ assert(elem_size == 1 || elem_size == 2, "must be char or byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1, tmp3, tmp4, tmp5, tmp6); ++ mv(elem_per_word, wordSize / elem_size); ++ ++ BLOCK_COMMENT("arrays_equals {"); ++ ++ // if (a1 == a2), return true ++ oop_equal(a1, a2, SAME); ++ ++ mv(result, false); ++ beqz(a1, DONE); ++ beqz(a2, DONE); ++ lwu(cnt1, Address(a1, length_offset)); ++ lwu(cnt2, Address(a2, length_offset)); ++ bne(cnt2, cnt1, DONE); ++ beqz(cnt1, SAME); ++ ++ slli(tmp5, cnt1, 3 + log_elem_size); ++ sub(tmp5, zr, tmp5); ++ add(a1, a1, base_offset); ++ add(a2, a2, base_offset); ++ ld(tmp3, Address(a1, 0)); ++ ld(tmp4, Address(a2, 0)); ++ ble(cnt1, elem_per_word, SHORT); // short or same ++ ++ // Main 16 byte comparison loop with 2 exits ++ bind(NEXT_DWORD); { ++ ld(tmp1, Address(a1, wordSize)); ++ ld(tmp2, Address(a2, wordSize)); ++ sub(cnt1, cnt1, 2 * wordSize / elem_size); ++ blez(cnt1, TAIL); ++ bne(tmp3, tmp4, DONE); ++ ld(tmp3, Address(a1, 2 * wordSize)); ++ ld(tmp4, Address(a2, 2 * wordSize)); ++ add(a1, a1, 2 * wordSize); ++ add(a2, a2, 2 * wordSize); ++ ble(cnt1, elem_per_word, TAIL2); ++ } beq(tmp1, tmp2, NEXT_DWORD); ++ j(DONE); ++ ++ bind(TAIL); ++ xorr(tmp4, tmp3, tmp4); ++ xorr(tmp2, tmp1, tmp2); ++ sll(tmp2, tmp2, tmp5); ++ orr(tmp5, tmp4, tmp2); ++ j(IS_TMP5_ZR); ++ ++ bind(TAIL2); ++ bne(tmp1, tmp2, DONE); ++ ++ bind(SHORT); ++ xorr(tmp4, tmp3, tmp4); ++ sll(tmp5, tmp4, tmp5); ++ ++ bind(IS_TMP5_ZR); ++ bnez(tmp5, DONE); ++ ++ bind(SAME); ++ mv(result, true); ++ // That's it. ++ bind(DONE); ++ ++ BLOCK_COMMENT("} array_equals"); ++ postcond(pc() != badAddress); ++ return pc(); ++} ++ ++// Compare Strings ++ ++// For Strings we're passed the address of the first characters in a1 ++// and a2 and the length in cnt1. ++// elem_size is the element size in bytes: either 1 or 2. ++// There are two implementations. For arrays >= 8 bytes, all ++// comparisons (including the final one, which may overlap) are ++// performed 8 bytes at a time. For strings < 8 bytes, we compare a ++// halfword, then a short, and then a byte. ++ ++void MacroAssembler::string_equals(Register a1, Register a2, ++ Register result, Register cnt1, int elem_size) ++{ ++ Label SAME, DONE, SHORT, NEXT_WORD; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ ++ assert(elem_size == 1 || elem_size == 2, "must be 2 or 1 byte"); ++ assert_different_registers(a1, a2, result, cnt1, t0, t1); ++ ++ BLOCK_COMMENT("string_equals {"); ++ ++ beqz(cnt1, SAME); ++ mv(result, false); ++ ++ // Check for short strings, i.e. smaller than wordSize. ++ sub(cnt1, cnt1, wordSize); ++ blez(cnt1, SHORT); ++ ++ // Main 8 byte comparison loop. ++ bind(NEXT_WORD); { ++ ld(tmp1, Address(a1, 0)); ++ add(a1, a1, wordSize); ++ ld(tmp2, Address(a2, 0)); ++ add(a2, a2, wordSize); ++ sub(cnt1, cnt1, wordSize); ++ bne(tmp1, tmp2, DONE); ++ } bgtz(cnt1, NEXT_WORD); ++ ++ if (!AvoidUnalignedAccesses) { ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when ++ // length == 4. ++ add(tmp1, a1, cnt1); ++ ld(tmp1, Address(tmp1, 0)); ++ add(tmp2, a2, cnt1); ++ ld(tmp2, Address(tmp2, 0)); ++ bne(tmp1, tmp2, DONE); ++ j(SAME); ++ } ++ ++ bind(SHORT); ++ ld(tmp1, Address(a1)); ++ ld(tmp2, Address(a2)); ++ xorr(tmp1, tmp1, tmp2); ++ neg(cnt1, cnt1); ++ slli(cnt1, cnt1, LogBitsPerByte); ++ sll(tmp1, tmp1, cnt1); ++ bnez(tmp1, DONE); ++ ++ // Arrays are equal. ++ bind(SAME); ++ mv(result, true); ++ ++ // That's it. ++ bind(DONE); ++ BLOCK_COMMENT("} string_equals"); ++} ++ ++typedef void (MacroAssembler::*load_chr_insn)(Register Rd, const Address &adr, Register temp); ++ ++// Compare strings. ++void MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, ++ Register tmp3, int ae) ++{ ++ Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB, ++ DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT, ++ SHORT_LOOP_START, TAIL_CHECK, L; ++ ++ const int STUB_THRESHOLD = 64 + 8; ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; ++ ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; ++ ++ // for L strings, 1 byte for 1 character ++ // for U strings, 2 bytes for 1 character ++ int str1_chr_size = str1_isL ? 1 : 2; ++ int str2_chr_size = str2_isL ? 1 : 2; ++ int minCharsInWord = isLL ? wordSize : wordSize / 2; ++ ++ load_chr_insn str1_load_chr = str1_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn str2_load_chr = str2_isL ? (load_chr_insn)&MacroAssembler::lbu : (load_chr_insn)&MacroAssembler::lhu; ++ ++ BLOCK_COMMENT("string_compare {"); ++ ++ // Bizzarely, the counts are passed in bytes, regardless of whether they ++ // are L or U strings, however the result is always in characters. ++ if (!str1_isL) { ++ sraiw(cnt1, cnt1, 1); ++ } ++ if (!str2_isL) { ++ sraiw(cnt2, cnt2, 1); ++ } ++ ++ // Compute the minimum of the string lengths and save the difference in result. ++ sub(result, cnt1, cnt2); ++ bgt(cnt1, cnt2, L); ++ mv(cnt2, cnt1); ++ bind(L); ++ ++ // A very short string ++ mv(t0, minCharsInWord); ++ ble(cnt2, t0, SHORT_STRING); ++ ++ // Compare longwords ++ // load first parts of strings and finish initialization while loading ++ { ++ if (str1_isL == str2_isL) { // LL or UU ++ // check if str1 and str2 are same string ++ beq(str1, str2, DONE); ++ // load 8 bytes once to compare ++ ld(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ mv(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ sub(cnt2, cnt2, minCharsInWord); ++ beqz(cnt2, TAIL_CHECK); ++ // convert cnt2 from characters to bytes ++ if(!str1_isL) { ++ slli(cnt2, cnt2, 1); ++ } ++ add(str2, str2, cnt2); ++ add(str1, str1, cnt2); ++ sub(cnt2, zr, cnt2); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ mv(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ add(str1, str1, cnt2); ++ sub(cnt1, zr, cnt2); ++ slli(cnt2, cnt2, 1); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 4); ++ } else { // UL case ++ ld(tmp1, Address(str1)); ++ lwu(tmp2, Address(str2)); ++ mv(t0, STUB_THRESHOLD); ++ bge(cnt2, t0, STUB); ++ addi(cnt2, cnt2, -4); ++ slli(t0, cnt2, 1); ++ sub(cnt1, zr, t0); ++ add(str1, str1, t0); ++ add(str2, str2, cnt2); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ sub(cnt2, zr, cnt2); ++ addi(cnt1, cnt1, 8); ++ } ++ addi(cnt2, cnt2, isUL ? 4 : 8); ++ bgez(cnt2, TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); ++ ++ // main loop ++ bind(NEXT_WORD); ++ if (str1_isL == str2_isL) { // LL or UU ++ add(t0, str1, cnt2); ++ ld(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt2, cnt2, 8); ++ } else if (isLU) { // LU case ++ add(t0, str1, cnt1); ++ lwu(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ addi(cnt1, cnt1, 4); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ addi(cnt2, cnt2, 8); ++ } else { // UL case ++ add(t0, str2, cnt2); ++ lwu(tmp2, Address(t0)); ++ add(t0, str1, cnt1); ++ ld(tmp1, Address(t0)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ addi(cnt1, cnt1, 8); ++ addi(cnt2, cnt2, 4); ++ } ++ bgez(cnt2, TAIL); ++ ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, NEXT_WORD); ++ j(DIFFERENCE); ++ bind(TAIL); ++ xorr(tmp3, tmp1, tmp2); ++ bnez(tmp3, DIFFERENCE); ++ // Last longword. ++ if (AvoidUnalignedAccesses) { ++ // Aligned access. Load bytes from byte-aligned address, ++ // which may contain invalid bytes when remaining bytes is ++ // less than 4(UL/LU) or 8 (LL/UU). ++ // Invalid bytes should be removed before comparison. ++ if (str1_isL == str2_isL) { // LL or UU ++ add(t0, str1, cnt2); ++ ld(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ } else if (isLU) { // LU ++ add(t0, str1, cnt1); ++ lwu(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ ld(tmp2, Address(t0)); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ } else { // UL ++ add(t0, str1, cnt1); ++ ld(tmp1, Address(t0)); ++ add(t0, str2, cnt2); ++ lwu(tmp2, Address(t0)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ slli(cnt2, cnt2, 1); // UL case should convert cnt2 to bytes ++ } ++ // remove invalid bytes ++ slli(t0, cnt2, LogBitsPerByte); ++ sll(tmp1, tmp1, t0); ++ sll(tmp2, tmp2, t0); ++ } else { ++ // Last longword. In the case where length == 4 we compare the ++ // same longword twice, but that's still faster than another ++ // conditional branch. ++ if (str1_isL == str2_isL) { // LL or UU ++ ld(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ } else if (isLU) { // LU case ++ lwu(tmp1, Address(str1)); ++ ld(tmp2, Address(str2)); ++ inflate_lo32(tmp3, tmp1); ++ mv(tmp1, tmp3); ++ } else { // UL case ++ ld(tmp1, Address(str1)); ++ lwu(tmp2, Address(str2)); ++ inflate_lo32(tmp3, tmp2); ++ mv(tmp2, tmp3); ++ } ++ } ++ bind(TAIL_CHECK); ++ xorr(tmp3, tmp1, tmp2); ++ beqz(tmp3, DONE); ++ ++ // Find the first different characters in the longwords and ++ // compute their difference. ++ bind(DIFFERENCE); ++ ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb ++ srl(tmp1, tmp1, result); ++ srl(tmp2, tmp2, result); ++ if (isLL) { ++ andi(tmp1, tmp1, 0xFF); ++ andi(tmp2, tmp2, 0xFF); ++ } else { ++ andi(tmp1, tmp1, 0xFFFF); ++ andi(tmp2, tmp2, 0xFFFF); ++ } ++ sub(result, tmp1, tmp2); ++ j(DONE); ++ } ++ ++ bind(STUB); ++ RuntimeAddress stub = NULL; ++ switch (ae) { ++ case StrIntrinsicNode::LL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); ++ break; ++ case StrIntrinsicNode::UU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); ++ break; ++ case StrIntrinsicNode::LU: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); ++ break; ++ case StrIntrinsicNode::UL: ++ stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ assert(stub.target() != NULL, "compare_long_string stub has not been generated"); ++ trampoline_call(stub); ++ j(DONE); ++ ++ bind(SHORT_STRING); ++ // Is the minimum length zero? ++ beqz(cnt2, DONE); ++ // arrange code to do most branches while loading and loading next characters ++ // while comparing previous ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ j(SHORT_LOOP_START); ++ bind(SHORT_LOOP); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST); ++ bind(SHORT_LOOP_START); ++ (this->*str1_load_chr)(tmp2, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(t0, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bne(tmp1, cnt1, SHORT_LOOP_TAIL); ++ addi(cnt2, cnt2, -1); ++ beqz(cnt2, SHORT_LAST2); ++ (this->*str1_load_chr)(tmp1, Address(str1), t0); ++ addi(str1, str1, str1_chr_size); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ beq(tmp2, t0, SHORT_LOOP); ++ sub(result, tmp2, t0); ++ j(DONE); ++ bind(SHORT_LOOP_TAIL); ++ sub(result, tmp1, cnt1); ++ j(DONE); ++ bind(SHORT_LAST2); ++ beq(tmp2, t0, DONE); ++ sub(result, tmp2, t0); ++ ++ j(DONE); ++ bind(SHORT_LAST_INIT); ++ (this->*str2_load_chr)(cnt1, Address(str2), t0); ++ addi(str2, str2, str2_chr_size); ++ bind(SHORT_LAST); ++ beq(tmp1, cnt1, DONE); ++ sub(result, tmp1, cnt1); ++ ++ bind(DONE); ++ ++ BLOCK_COMMENT("} string_compare"); ++} ++ ++// short string ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void MacroAssembler::string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL) ++{ ++ Register ch1 = t0; ++ Register index = t1; ++ ++ BLOCK_COMMENT("string_indexof_char_short {"); ++ ++ Label LOOP, LOOP1, LOOP4, LOOP8; ++ Label MATCH, MATCH1, MATCH2, MATCH3, ++ MATCH4, MATCH5, MATCH6, MATCH7, NOMATCH; ++ ++ mv(result, -1); ++ mv(index, zr); ++ ++ bind(LOOP); ++ addi(t0, index, 8); ++ ble(t0, cnt1, LOOP8); ++ addi(t0, index, 4); ++ ble(t0, cnt1, LOOP4); ++ j(LOOP1); ++ ++ bind(LOOP8); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ isL ? lbu(ch1, Address(str1, 4)) : lhu(ch1, Address(str1, 8)); ++ beq(ch, ch1, MATCH4); ++ isL ? lbu(ch1, Address(str1, 5)) : lhu(ch1, Address(str1, 10)); ++ beq(ch, ch1, MATCH5); ++ isL ? lbu(ch1, Address(str1, 6)) : lhu(ch1, Address(str1, 12)); ++ beq(ch, ch1, MATCH6); ++ isL ? lbu(ch1, Address(str1, 7)) : lhu(ch1, Address(str1, 14)); ++ beq(ch, ch1, MATCH7); ++ addi(index, index, 8); ++ addi(str1, str1, isL ? 8 : 16); ++ blt(index, cnt1, LOOP); ++ j(NOMATCH); ++ ++ bind(LOOP4); ++ isL ? lbu(ch1, Address(str1, 0)) : lhu(ch1, Address(str1, 0)); ++ beq(ch, ch1, MATCH); ++ isL ? lbu(ch1, Address(str1, 1)) : lhu(ch1, Address(str1, 2)); ++ beq(ch, ch1, MATCH1); ++ isL ? lbu(ch1, Address(str1, 2)) : lhu(ch1, Address(str1, 4)); ++ beq(ch, ch1, MATCH2); ++ isL ? lbu(ch1, Address(str1, 3)) : lhu(ch1, Address(str1, 6)); ++ beq(ch, ch1, MATCH3); ++ addi(index, index, 4); ++ addi(str1, str1, isL ? 4 : 8); ++ bge(index, cnt1, NOMATCH); ++ ++ bind(LOOP1); ++ isL ? lbu(ch1, Address(str1)) : lhu(ch1, Address(str1)); ++ beq(ch, ch1, MATCH); ++ addi(index, index, 1); ++ addi(str1, str1, isL ? 1 : 2); ++ blt(index, cnt1, LOOP1); ++ j(NOMATCH); ++ ++ bind(MATCH1); ++ addi(index, index, 1); ++ j(MATCH); ++ ++ bind(MATCH2); ++ addi(index, index, 2); ++ j(MATCH); ++ ++ bind(MATCH3); ++ addi(index, index, 3); ++ j(MATCH); ++ ++ bind(MATCH4); ++ addi(index, index, 4); ++ j(MATCH); ++ ++ bind(MATCH5); ++ addi(index, index, 5); ++ j(MATCH); ++ ++ bind(MATCH6); ++ addi(index, index, 6); ++ j(MATCH); ++ ++ bind(MATCH7); ++ addi(index, index, 7); ++ ++ bind(MATCH); ++ mv(result, index); ++ bind(NOMATCH); ++ BLOCK_COMMENT("} string_indexof_char_short"); ++} ++ ++// StringUTF16.indexOfChar ++// StringLatin1.indexOfChar ++void MacroAssembler::string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL) ++{ ++ Label CH1_LOOP, HIT, NOMATCH, DONE, DO_LONG; ++ Register ch1 = t0; ++ Register orig_cnt = t1; ++ Register mask1 = tmp3; ++ Register mask2 = tmp2; ++ Register match_mask = tmp1; ++ Register trailing_char = tmp4; ++ Register unaligned_elems = tmp4; ++ ++ BLOCK_COMMENT("string_indexof_char {"); ++ beqz(cnt1, NOMATCH); ++ ++ addi(t0, cnt1, isL ? -32 : -16); ++ bgtz(t0, DO_LONG); ++ string_indexof_char_short(str1, cnt1, ch, result, isL); ++ j(DONE); ++ ++ bind(DO_LONG); ++ mv(orig_cnt, cnt1); ++ if (AvoidUnalignedAccesses) { ++ Label ALIGNED; ++ andi(unaligned_elems, str1, 0x7); ++ beqz(unaligned_elems, ALIGNED); ++ sub(unaligned_elems, unaligned_elems, 8); ++ neg(unaligned_elems, unaligned_elems); ++ if (!isL) { ++ srli(unaligned_elems, unaligned_elems, 1); ++ } ++ // do unaligned part per element ++ string_indexof_char_short(str1, unaligned_elems, ch, result, isL); ++ bgez(result, DONE); ++ mv(orig_cnt, cnt1); ++ sub(cnt1, cnt1, unaligned_elems); ++ bind(ALIGNED); ++ } ++ ++ // duplicate ch ++ if (isL) { ++ slli(ch1, ch, 8); ++ orr(ch, ch1, ch); ++ } ++ slli(ch1, ch, 16); ++ orr(ch, ch1, ch); ++ slli(ch1, ch, 32); ++ orr(ch, ch1, ch); ++ ++ if (!isL) { ++ slli(cnt1, cnt1, 1); ++ } ++ ++ mv(mask1, isL ? 0x0101010101010101 : 0x0001000100010001); ++ mv(mask2, isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); ++ ++ bind(CH1_LOOP); ++ ld(ch1, Address(str1)); ++ addi(str1, str1, 8); ++ addi(cnt1, cnt1, -8); ++ compute_match_mask(ch1, ch, match_mask, mask1, mask2); ++ bnez(match_mask, HIT); ++ bgtz(cnt1, CH1_LOOP); ++ j(NOMATCH); ++ ++ bind(HIT); ++ ctzc_bit(trailing_char, match_mask, isL, ch1, result); ++ srli(trailing_char, trailing_char, 3); ++ addi(cnt1, cnt1, 8); ++ ble(cnt1, trailing_char, NOMATCH); ++ // match case ++ if (!isL) { ++ srli(cnt1, cnt1, 1); ++ srli(trailing_char, trailing_char, 1); ++ } ++ ++ sub(result, orig_cnt, cnt1); ++ add(result, result, trailing_char); ++ j(DONE); ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof_char"); ++} ++ ++// Search for needle in haystack and return index or -1 ++// x10: result ++// x11: haystack ++// x12: haystack_len ++// x13: needle ++// x14: needle_len ++void MacroAssembler::string_indexof(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae) ++{ ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ ++ Label LINEARSEARCH, LINEARSTUB, DONE, NOMATCH; ++ ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register nlen_tmp = tmp1; // needle len tmp ++ Register hlen_tmp = tmp2; // haystack len tmp ++ Register result_tmp = tmp4; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ ++ BLOCK_COMMENT("string_indexof {"); ++ ++ // Note, inline_string_indexOf() generates checks: ++ // if (pattern.count > src.count) return -1; ++ // if (pattern.count == 0) return 0; ++ ++ // We have two strings, a source string in haystack, haystack_len and a pattern string ++ // in needle, needle_len. Find the first occurence of pattern in source or return -1. ++ ++ // For larger pattern and source we use a simplified Boyer Moore algorithm. ++ // With a small pattern and source we use linear scan. ++ ++ // needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm. ++ sub(result_tmp, haystack_len, needle_len); ++ // needle_len < 8, use linear scan ++ sub(t0, needle_len, 8); ++ bltz(t0, LINEARSEARCH); ++ // needle_len >= 256, use linear scan ++ sub(t0, needle_len, 256); ++ bgez(t0, LINEARSTUB); ++ // needle_len >= haystack_len/4, use linear scan ++ srli(t0, haystack_len, 2); ++ bge(needle_len, t0, LINEARSTUB); ++ ++ // Boyer-Moore-Horspool introduction: ++ // The Boyer Moore alogorithm is based on the description here:- ++ // ++ // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm ++ // ++ // This describes and algorithm with 2 shift rules. The 'Bad Character' rule ++ // and the 'Good Suffix' rule. ++ // ++ // These rules are essentially heuristics for how far we can shift the ++ // pattern along the search string. ++ // ++ // The implementation here uses the 'Bad Character' rule only because of the ++ // complexity of initialisation for the 'Good Suffix' rule. ++ // ++ // This is also known as the Boyer-Moore-Horspool algorithm: ++ // ++ // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm ++ // ++ // #define ASIZE 256 ++ // ++ // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { ++ // int i, j; ++ // unsigned c; ++ // unsigned char bc[ASIZE]; ++ // ++ // /* Preprocessing */ ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ // ++ // /* Searching */ ++ // j = 0; ++ // while (j <= n - m) { ++ // c = src[i+j]; ++ // if (pattern[m-1] == c) ++ // int k; ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // if (k < 0) return j; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 ++ // // LL case: (c< 256) always true. Remove branch ++ // j += bc[pattern[j+m-1]]; ++ // #endif ++ // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF ++ // // UU case: need if (c if not. ++ // if (c < ASIZE) ++ // j += bc[pattern[j+m-1]]; ++ // else ++ // j += m ++ // #endif ++ // } ++ // return -1; ++ // } ++ ++ // temp register:t0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, result ++ Label BCLOOP, BCSKIP, BMLOOPSTR2, BMLOOPSTR1, BMSKIP, BMADV, BMMATCH, ++ BMLOOPSTR1_LASTCMP, BMLOOPSTR1_CMP, BMLOOPSTR1_AFTER_LOAD, BM_INIT_LOOP; ++ ++ Register haystack_end = haystack_len; ++ Register skipch = tmp2; ++ ++ // pattern length is >=8, so, we can read at least 1 register for cases when ++ // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for ++ // UL case. We'll re-read last character in inner pre-loop code to have ++ // single outer pre-loop load ++ const int firstStep = isLL ? 7 : 3; ++ ++ const int ASIZE = 256; ++ const int STORE_BYTES = 8; // 8 bytes stored per instruction(sd) ++ ++ sub(sp, sp, ASIZE); ++ ++ // init BC offset table with default value: needle_len ++ slli(t0, needle_len, 8); ++ orr(t0, t0, needle_len); // [63...16][needle_len][needle_len] ++ slli(tmp1, t0, 16); ++ orr(t0, tmp1, t0); // [63...32][needle_len][needle_len][needle_len][needle_len] ++ slli(tmp1, t0, 32); ++ orr(tmp5, tmp1, t0); // tmp5: 8 elements [needle_len] ++ ++ mv(ch1, sp); // ch1 is t0 ++ mv(tmp6, ASIZE / STORE_BYTES); // loop iterations ++ ++ bind(BM_INIT_LOOP); ++ // for (i = 0; i < ASIZE; ++i) ++ // bc[i] = m; ++ for (int i = 0; i < 4; i++) { ++ sd(tmp5, Address(ch1, i * wordSize)); ++ } ++ add(ch1, ch1, 32); ++ sub(tmp6, tmp6, 4); ++ bgtz(tmp6, BM_INIT_LOOP); ++ ++ sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern ++ Register orig_haystack = tmp5; ++ mv(orig_haystack, haystack); ++ // result_tmp = tmp4 ++ shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift); ++ sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1 ++ mv(tmp3, needle); ++ ++ // for (i = 0; i < m - 1; ) { ++ // c = pattern[i]; ++ // ++i; ++ // // c < 256 for Latin1 string, so, no need for branch ++ // #ifdef PATTERN_STRING_IS_LATIN1 ++ // bc[c] = m - i; ++ // #else ++ // if (c < ASIZE) bc[c] = m - i; ++ // #endif ++ // } ++ bind(BCLOOP); ++ (this->*needle_load_1chr)(ch1, Address(tmp3), noreg); ++ add(tmp3, tmp3, needle_chr_size); ++ if (!needle_isL) { ++ // ae == StrIntrinsicNode::UU ++ mv(tmp6, ASIZE); ++ bgeu(ch1, tmp6, BCSKIP); ++ } ++ add(tmp4, sp, ch1); ++ sb(ch2, Address(tmp4)); // store skip offset to BC offset table ++ ++ bind(BCSKIP); ++ sub(ch2, ch2, 1); // for next pattern element, skip distance -1 ++ bgtz(ch2, BCLOOP); ++ ++ // tmp6: pattern end, address after needle ++ shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); ++ if (needle_isL == haystack_isL) { ++ // load last 8 bytes (8LL/4UU symbols) ++ ld(tmp6, Address(tmp6, -wordSize)); ++ } else { ++ // UL: from UTF-16(source) search Latin1(pattern) ++ lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) ++ // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d ++ // We'll have to wait until load completed, but it's still faster than per-character loads+checks ++ srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a ++ slli(ch2, tmp6, XLEN - 24); ++ srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b ++ slli(ch1, tmp6, XLEN - 16); ++ srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c ++ andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d ++ slli(ch2, ch2, 16); ++ orr(ch2, ch2, ch1); // 0x00000b0c ++ slli(result, tmp3, 48); // use result as temp register ++ orr(tmp6, tmp6, result); // 0x0a00000d ++ slli(result, ch2, 16); ++ orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d ++ } ++ ++ // i = m - 1; ++ // skipch = j + i; ++ // if (skipch == pattern[m - 1] ++ // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); ++ // else ++ // move j with bad char offset table ++ bind(BMLOOPSTR2); ++ // compare pattern to source string backward ++ shadd(result, nlen_tmp, haystack, result, haystack_chr_shift); ++ (this->*haystack_load_1chr)(skipch, Address(result), noreg); ++ sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 ++ if (needle_isL == haystack_isL) { ++ // re-init tmp3. It's for free because it's executed in parallel with ++ // load above. Alternative is to initialize it before loop, but it'll ++ // affect performance on in-order systems with 2 or more ld/st pipelines ++ srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1] ++ } ++ if (!isLL) { // UU/UL case ++ slli(ch2, nlen_tmp, 1); // offsets in bytes ++ } ++ bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char ++ add(result, haystack, isLL ? nlen_tmp : ch2); ++ ld(ch2, Address(result)); // load 8 bytes from source string ++ mv(ch1, tmp6); ++ if (isLL) { ++ j(BMLOOPSTR1_AFTER_LOAD); ++ } else { ++ sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8 ++ j(BMLOOPSTR1_CMP); ++ } ++ ++ bind(BMLOOPSTR1); ++ shadd(ch1, nlen_tmp, needle, ch1, needle_chr_shift); ++ shadd(ch2, nlen_tmp, haystack, ch2, haystack_chr_shift); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ ++ bind(BMLOOPSTR1_AFTER_LOAD); ++ sub(nlen_tmp, nlen_tmp, 1); ++ bltz(nlen_tmp, BMLOOPSTR1_LASTCMP); ++ ++ bind(BMLOOPSTR1_CMP); ++ beq(ch1, ch2, BMLOOPSTR1); ++ ++ bind(BMSKIP); ++ if (!isLL) { ++ // if we've met UTF symbol while searching Latin1 pattern, then we can ++ // skip needle_len symbols ++ if (needle_isL != haystack_isL) { ++ mv(result_tmp, needle_len); ++ } else { ++ mv(result_tmp, 1); ++ } ++ mv(t0, ASIZE); ++ bgeu(skipch, t0, BMADV); ++ } ++ add(result_tmp, sp, skipch); ++ lbu(result_tmp, Address(result_tmp)); // load skip offset ++ ++ bind(BMADV); ++ sub(nlen_tmp, needle_len, 1); ++ // move haystack after bad char skip offset ++ shadd(haystack, result_tmp, haystack, result, haystack_chr_shift); ++ ble(haystack, haystack_end, BMLOOPSTR2); ++ add(sp, sp, ASIZE); ++ j(NOMATCH); ++ ++ bind(BMLOOPSTR1_LASTCMP); ++ bne(ch1, ch2, BMSKIP); ++ ++ bind(BMMATCH); ++ sub(result, haystack, orig_haystack); ++ if (!haystack_isL) { ++ srli(result, result, 1); ++ } ++ add(sp, sp, ASIZE); ++ j(DONE); ++ ++ bind(LINEARSTUB); ++ sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm ++ bltz(t0, LINEARSEARCH); ++ mv(result, zr); ++ RuntimeAddress stub = NULL; ++ if (isLL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll()); ++ assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated"); ++ } else if (needle_isL) { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul()); ++ assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated"); ++ } else { ++ stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu()); ++ assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated"); ++ } ++ trampoline_call(stub); ++ j(DONE); ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); ++ ++ bind(LINEARSEARCH); ++ string_indexof_linearscan(haystack, needle, haystack_len, needle_len, tmp1, tmp2, tmp3, tmp4, -1, result, ae); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} string_indexof"); ++} ++ ++// string_indexof ++// result: x10 ++// src: x11 ++// src_count: x12 ++// pattern: x13 ++// pattern_count: x14 or 1/2/3/4 ++void MacroAssembler::string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae) ++{ ++ // Note: ++ // needle_con_cnt > 0 means needle_len register is invalid, needle length is constant ++ // for UU/LL: needle_con_cnt[1, 4], UL: needle_con_cnt = 1 ++ assert(needle_con_cnt <= 4, "Invalid needle constant count"); ++ assert(ae != StrIntrinsicNode::LU, "Invalid encoding"); ++ ++ Register ch1 = t0; ++ Register ch2 = t1; ++ Register hlen_neg = haystack_len, nlen_neg = needle_len; ++ Register nlen_tmp = tmp1, hlen_tmp = tmp2, result_tmp = tmp4; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ ++ bool needle_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL; ++ bool haystack_isL = ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::LU; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ ++ load_chr_insn needle_load_1chr = needle_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn haystack_load_1chr = haystack_isL ? (load_chr_insn)&MacroAssembler::lbu : ++ (load_chr_insn)&MacroAssembler::lhu; ++ load_chr_insn load_2chr = isLL ? (load_chr_insn)&MacroAssembler::lhu : (load_chr_insn)&MacroAssembler::lwu; ++ load_chr_insn load_4chr = isLL ? (load_chr_insn)&MacroAssembler::lwu : (load_chr_insn)&MacroAssembler::ld; ++ ++ Label DO1, DO2, DO3, MATCH, NOMATCH, DONE; ++ ++ Register first = tmp3; ++ ++ if (needle_con_cnt == -1) { ++ Label DOSHORT, FIRST_LOOP, STR2_NEXT, STR1_LOOP, STR1_NEXT; ++ ++ sub(t0, needle_len, needle_isL == haystack_isL ? 4 : 2); ++ bltz(t0, DOSHORT); ++ ++ (this->*needle_load_1chr)(first, Address(needle), noreg); ++ slli(t0, needle_len, needle_chr_shift); ++ add(needle, needle, t0); ++ neg(nlen_neg, t0); ++ slli(t0, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, t0); ++ neg(hlen_neg, t0); ++ ++ bind(FIRST_LOOP); ++ add(t0, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(t0), noreg); ++ beq(first, ch2, STR1_LOOP); ++ ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); ++ ++ bind(STR1_LOOP); ++ add(nlen_tmp, nlen_neg, needle_chr_size); ++ add(hlen_tmp, hlen_neg, haystack_chr_size); ++ bgez(nlen_tmp, MATCH); ++ ++ bind(STR1_NEXT); ++ add(ch1, needle, nlen_tmp); ++ (this->*needle_load_1chr)(ch1, Address(ch1), noreg); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ add(nlen_tmp, nlen_tmp, needle_chr_size); ++ add(hlen_tmp, hlen_tmp, haystack_chr_size); ++ bltz(nlen_tmp, STR1_NEXT); ++ j(MATCH); ++ ++ bind(DOSHORT); ++ if (needle_isL == haystack_isL) { ++ sub(t0, needle_len, 2); ++ bltz(t0, DO1); ++ bgtz(t0, DO3); ++ } ++ } ++ ++ if (needle_con_cnt == 4) { ++ Label CH1_LOOP; ++ (this->*load_4chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 4); ++ slli(tmp3, result_tmp, haystack_chr_shift); // result as tmp ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); ++ ++ bind(CH1_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_4chr)(ch2, Address(ch2), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ } ++ ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 2) { ++ Label CH1_LOOP; ++ BLOCK_COMMENT("string_indexof DO2 {"); ++ bind(DO2); ++ (this->*load_2chr)(ch1, Address(needle), noreg); ++ if (needle_con_cnt == 2) { ++ sub(result_tmp, haystack_len, 2); ++ } ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); ++ ++ bind(CH1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, CH1_LOOP); ++ j(NOMATCH); ++ BLOCK_COMMENT("} string_indexof DO2"); ++ } ++ ++ if ((needle_con_cnt == -1 && needle_isL == haystack_isL) || needle_con_cnt == 3) { ++ Label FIRST_LOOP, STR2_NEXT, STR1_LOOP; ++ BLOCK_COMMENT("string_indexof DO3 {"); ++ ++ bind(DO3); ++ (this->*load_2chr)(first, Address(needle), noreg); ++ (this->*needle_load_1chr)(ch1, Address(needle, 2 * needle_chr_size), noreg); ++ if (needle_con_cnt == 3) { ++ sub(result_tmp, haystack_len, 3); ++ } ++ slli(hlen_tmp, result_tmp, haystack_chr_shift); ++ add(haystack, haystack, hlen_tmp); ++ neg(hlen_neg, hlen_tmp); ++ ++ bind(FIRST_LOOP); ++ add(ch2, haystack, hlen_neg); ++ (this->*load_2chr)(ch2, Address(ch2), noreg); ++ beq(first, ch2, STR1_LOOP); ++ ++ bind(STR2_NEXT); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, FIRST_LOOP); ++ j(NOMATCH); ++ ++ bind(STR1_LOOP); ++ add(hlen_tmp, hlen_neg, 2 * haystack_chr_size); ++ add(ch2, haystack, hlen_tmp); ++ (this->*haystack_load_1chr)(ch2, Address(ch2), noreg); ++ bne(ch1, ch2, STR2_NEXT); ++ j(MATCH); ++ BLOCK_COMMENT("} string_indexof DO3"); ++ } ++ ++ if (needle_con_cnt == -1 || needle_con_cnt == 1) { ++ Label DO1_LOOP; ++ ++ BLOCK_COMMENT("string_indexof DO1 {"); ++ bind(DO1); ++ (this->*needle_load_1chr)(ch1, Address(needle), noreg); ++ sub(result_tmp, haystack_len, 1); ++ mv(tmp3, result_tmp); ++ if (haystack_chr_shift) { ++ slli(tmp3, result_tmp, haystack_chr_shift); ++ } ++ add(haystack, haystack, tmp3); ++ neg(hlen_neg, tmp3); ++ ++ bind(DO1_LOOP); ++ add(tmp3, haystack, hlen_neg); ++ (this->*haystack_load_1chr)(ch2, Address(tmp3), noreg); ++ beq(ch1, ch2, MATCH); ++ add(hlen_neg, hlen_neg, haystack_chr_size); ++ blez(hlen_neg, DO1_LOOP); ++ BLOCK_COMMENT("} string_indexof DO1"); ++ } ++ ++ bind(NOMATCH); ++ mv(result, -1); ++ j(DONE); ++ ++ bind(MATCH); ++ srai(t0, hlen_neg, haystack_chr_shift); ++ add(result, result_tmp, t0); ++ ++ bind(DONE); ++} ++ ++void MacroAssembler::element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, ++ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE) { ++ Label loop; ++ Assembler::SEW sew = islatin ? Assembler::e8 : Assembler::e16; ++ ++ bind(loop); ++ vsetvli(tmp1, cnt, sew, Assembler::m2); ++ vlex_v(vr1, a1, sew); ++ vlex_v(vr2, a2, sew); ++ vmsne_vv(vrs, vr1, vr2); ++ vfirst_m(tmp2, vrs); ++ bgez(tmp2, DONE); ++ sub(cnt, cnt, tmp1); ++ if (!islatin) { ++ slli(tmp1, tmp1, 1); // get byte counts ++ } ++ add(a1, a1, tmp1); ++ add(a2, a2, tmp1); ++ bnez(cnt, loop); ++ ++ mv(result, true); ++} ++ ++void MacroAssembler::string_equals_v(Register a1, Register a2, Register result, Register cnt, int elem_size) { ++ Label DONE; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ ++ BLOCK_COMMENT("string_equals_v {"); ++ ++ mv(result, false); ++ ++ if (elem_size == 2) { ++ srli(cnt, cnt, 1); ++ } ++ ++ element_compare(a1, a2, result, cnt, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} string_equals_v"); ++} ++ ++// used by C2 ClearArray patterns. ++// base: Address of a buffer to be zeroed ++// cnt: Count in HeapWords ++// ++// base, cnt, v0, v1 and t0 are clobbered. ++void MacroAssembler::clear_array_v(Register base, Register cnt) { ++ Label loop; ++ ++ // making zero words ++ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); ++ vxor_vv(v0, v0, v0); ++ ++ bind(loop); ++ vsetvli(t0, cnt, Assembler::e64, Assembler::m4); ++ vse64_v(v0, base); ++ sub(cnt, cnt, t0); ++ shadd(base, t0, base, t0, 3); ++ bnez(cnt, loop); ++} ++ ++void MacroAssembler::arrays_equals_v(Register a1, Register a2, Register result, ++ Register cnt1, int elem_size) { ++ Label DONE; ++ Register tmp1 = t0; ++ Register tmp2 = t1; ++ Register cnt2 = tmp2; ++ int length_offset = arrayOopDesc::length_offset_in_bytes(); ++ int base_offset = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE); ++ ++ BLOCK_COMMENT("arrays_equals_v {"); ++ ++ // if (a1 == a2), return true ++ mv(result, true); ++ oop_equal(a1, a2, DONE); ++ ++ mv(result, false); ++ // if a1 == null or a2 == null, return false ++ beqz(a1, DONE); ++ beqz(a2, DONE); ++ // if (a1.length != a2.length), return false ++ lwu(cnt1, Address(a1, length_offset)); ++ lwu(cnt2, Address(a2, length_offset)); ++ bne(cnt1, cnt2, DONE); ++ ++ la(a1, Address(a1, base_offset)); ++ la(a2, Address(a2, base_offset)); ++ ++ element_compare(a1, a2, result, cnt1, tmp1, tmp2, v0, v2, v0, elem_size == 1, DONE); ++ ++ bind(DONE); ++ ++ BLOCK_COMMENT("} arrays_equals_v"); ++} ++ ++void MacroAssembler::string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, ++ Register result, Register tmp1, Register tmp2, int encForm) { ++ Label DIFFERENCE, DONE, L, loop; ++ bool encLL = encForm == StrIntrinsicNode::LL; ++ bool encLU = encForm == StrIntrinsicNode::LU; ++ bool encUL = encForm == StrIntrinsicNode::UL; ++ ++ bool str1_isL = encLL || encLU; ++ bool str2_isL = encLL || encUL; ++ ++ int minCharsInWord = encLL ? wordSize : wordSize / 2; ++ ++ BLOCK_COMMENT("string_compare {"); ++ ++ // for Lating strings, 1 byte for 1 character ++ // for UTF16 strings, 2 bytes for 1 character ++ if (!str1_isL) ++ sraiw(cnt1, cnt1, 1); ++ if (!str2_isL) ++ sraiw(cnt2, cnt2, 1); ++ ++ // if str1 == str2, return the difference ++ // save the minimum of the string lengths in cnt2. ++ sub(result, cnt1, cnt2); ++ bgt(cnt1, cnt2, L); ++ mv(cnt2, cnt1); ++ bind(L); ++ ++ if (str1_isL == str2_isL) { // LL or UU ++ element_compare(str1, str2, zr, cnt2, tmp1, tmp2, v2, v4, v1, encLL, DIFFERENCE); ++ j(DONE); ++ } else { // LU or UL ++ Register strL = encLU ? str1 : str2; ++ Register strU = encLU ? str2 : str1; ++ VectorRegister vstr1 = encLU ? v4 : v0; ++ VectorRegister vstr2 = encLU ? v0 : v4; ++ ++ bind(loop); ++ vsetvli(tmp1, cnt2, Assembler::e8, Assembler::m2); ++ vle8_v(vstr1, strL); ++ vsetvli(tmp1, cnt2, Assembler::e16, Assembler::m4); ++ vzext_vf2(vstr2, vstr1); ++ vle16_v(vstr1, strU); ++ vmsne_vv(v0, vstr2, vstr1); ++ vfirst_m(tmp2, v0); ++ bgez(tmp2, DIFFERENCE); ++ sub(cnt2, cnt2, tmp1); ++ add(strL, strL, tmp1); ++ shadd(strU, tmp1, strU, tmp1, 1); ++ bnez(cnt2, loop); ++ j(DONE); ++ } ++ bind(DIFFERENCE); ++ slli(tmp1, tmp2, 1); ++ add(str1, str1, str1_isL ? tmp2 : tmp1); ++ add(str2, str2, str2_isL ? tmp2 : tmp1); ++ str1_isL ? lbu(tmp1, Address(str1, 0)) : lhu(tmp1, Address(str1, 0)); ++ str2_isL ? lbu(tmp2, Address(str2, 0)) : lhu(tmp2, Address(str2, 0)); ++ sub(result, tmp1, tmp2); ++ ++ bind(DONE); ++} ++ ++address MacroAssembler::byte_array_inflate_v(Register src, Register dst, Register len, Register tmp) { ++ Label loop; ++ assert_different_registers(src, dst, len, tmp, t0); ++ ++ BLOCK_COMMENT("byte_array_inflate_v {"); ++ bind(loop); ++ vsetvli(tmp, len, Assembler::e8, Assembler::m2); ++ vle8_v(v2, src); ++ vsetvli(t0, len, Assembler::e16, Assembler::m4); ++ vzext_vf2(v0, v2); ++ vse16_v(v0, dst); ++ sub(len, len, tmp); ++ add(src, src, tmp); ++ shadd(dst, tmp, dst, tmp, 1); ++ bnez(len, loop); ++ BLOCK_COMMENT("} byte_array_inflate_v"); ++ postcond(pc() != badAddress); ++ return pc(); ++} ++ ++// Compress char[] array to byte[]. ++// result: the array length if every element in array can be encoded; 0, otherwise. ++void MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) { ++ Label done; ++ encode_iso_array_v(src, dst, len, result, tmp); ++ beqz(len, done); ++ mv(result, zr); ++ bind(done); ++} ++ ++// result: the number of elements had been encoded. ++void MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) { ++ Label loop, DIFFERENCE, DONE; ++ ++ BLOCK_COMMENT("encode_iso_array_v {"); ++ mv(result, 0); ++ ++ bind(loop); ++ mv(tmp, 0xff); ++ vsetvli(t0, len, Assembler::e16, Assembler::m2); ++ vle16_v(v2, src); ++ // if element > 0xff, stop ++ vmsgtu_vx(v1, v2, tmp); ++ vfirst_m(tmp, v1); ++ vmsbf_m(v0, v1); ++ // compress char to byte ++ vsetvli(t0, len, Assembler::e8); ++ vncvt_x_x_w(v1, v2, Assembler::v0_t); ++ vse8_v(v1, dst, Assembler::v0_t); ++ ++ bgez(tmp, DIFFERENCE); ++ add(result, result, t0); ++ add(dst, dst, t0); ++ sub(len, len, t0); ++ shadd(src, t0, src, t0, 1); ++ bnez(len, loop); ++ j(DONE); ++ ++ bind(DIFFERENCE); ++ add(result, result, tmp); ++ ++ bind(DONE); ++ BLOCK_COMMENT("} encode_iso_array_v"); ++} ++ ++address MacroAssembler::has_negatives_v(Register ary, Register len, Register result, Register tmp) { ++ Label loop, DONE; ++ ++ mv(result, true); ++ ++ bind(loop); ++ vsetvli(t0, len, Assembler::e8, Assembler::m4); ++ vle8_v(v0, ary); ++ // if element highest bit is set, return true ++ vmslt_vx(v0, v0, zr); ++ vfirst_m(tmp, v0); ++ bgez(tmp, DONE); ++ ++ sub(len, len, t0); ++ add(ary, ary, t0); ++ bnez(len, loop); ++ mv(result, false); ++ ++ bind(DONE); ++ postcond(pc() != badAddress); ++ return pc(); ++} ++ ++// string indexof ++// compute index by trailing zeros ++void MacroAssembler::compute_index(Register haystack, Register trailing_zero, ++ Register match_mask, Register result, ++ Register ch2, Register tmp, ++ bool haystack_isL) ++{ ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ srl(match_mask, match_mask, trailing_zero); ++ srli(match_mask, match_mask, 1); ++ srli(tmp, trailing_zero, LogBitsPerByte); ++ if (!haystack_isL) andi(tmp, tmp, 0xE); ++ add(haystack, haystack, tmp); ++ ld(ch2, Address(haystack)); ++ if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift); ++ add(result, result, tmp); ++} ++ ++// string indexof ++// Find pattern element in src, compute match mask, ++// only the first occurrence of 0x80/0x8000 at low bits is the valid match index ++// match mask patterns would be like: ++// - 0x8080808080808080 (Latin1) ++// - 0x8000800080008000 (UTF16) ++void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask, ++ Register mask1, Register mask2) ++{ ++ xorr(src, pattern, src); ++ sub(match_mask, src, mask1); ++ orr(src, src, mask2); ++ notr(src, src); ++ andr(match_mask, match_mask, src); ++} ++ ++// add two unsigned input and output carry ++void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry) ++{ ++ assert_different_registers(dst, carry); ++ assert_different_registers(dst, src2); ++ add(dst, src1, src2); ++ sltu(carry, dst, src2); ++} ++ ++// add two input with carry ++void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry) ++{ ++ assert_different_registers(dst, carry); ++ add(dst, src1, src2); ++ add(dst, dst, carry); ++} ++ ++// add two unsigned input with carry and output carry ++void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry) ++{ ++ assert_different_registers(dst, src2); ++ adc(dst, src1, src2, carry); ++ sltu(carry, dst, src2); ++} ++ ++void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, ++ Register src1, Register src2, Register carry) ++{ ++ cad(dest_lo, dest_lo, src1, carry); ++ add(dest_hi, dest_hi, carry); ++ cad(dest_lo, dest_lo, src2, carry); ++ add(final_dest_hi, dest_hi, carry); ++} ++ ++// Code for BigInteger::mulAdd instrinsic ++// out = x10 ++// in = x11 ++// offset = x12 (already out.length-offset) ++// len = x13 ++// k = x14 ++void MacroAssembler::mul_add(Register out, Register in, Register offset, ++ Register len, Register k, Register tmp1, Register tmp2) { ++ Label L_loop_1, L_loop_2, L_end, L_not_zero; ++ bnez(len, L_not_zero); ++ mv(out, zr); ++ j(L_end); ++ bind(L_not_zero); ++ zero_extend(k, k, 32); ++ shadd(offset, offset, out, t0, LogBytesPerInt); ++ shadd(in, len, in, t0, LogBytesPerInt); ++ mv(out, zr); ++ ++ if (AvoidUnalignedAccesses) { ++ // if in and offset are both 8 bytes aligned. ++ orr(t0, in, offset); ++ andi(t0, t0, 0x7); ++ beqz(t0, L_loop_2); ++ } else { ++ j(L_loop_2); ++ } ++ ++ bind(L_loop_1); ++ sub(in, in, 4); ++ lwu(t0, Address(in, 0)); ++ mul(t1, t0, k); ++ add(t0, t1, out); ++ sub(offset, offset, 4); ++ lwu(t1, Address(offset, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(offset)); ++ srli(out, t0, 32); ++ sub(len, len, 1); ++ beqz(len, L_end); ++ j(L_loop_1); ++ ++ ++ bind(L_loop_2); ++ Label L_one; ++ sub(len, len, 1); ++ bltz(len, L_end); ++ sub(len, len, 1); ++ bltz(len, L_one); ++ ++ sub(in, in, 8); ++ ld(tmp1, Address(in, 0)); ++ ror_imm(tmp1, tmp1, 32); // convert to little-endian ++ ++ const Register carry = out; ++ const Register src1_hi = t0; ++ const Register src1_lo = tmp2; ++ const Register src2 = t1; ++ ++ mulhu(src1_hi, k, tmp1); ++ mul(src1_lo, k, tmp1); ++ sub(offset, offset, 8); ++ ld(src2, Address(offset, 0)); ++ ror_imm(src2, src2, 32, tmp1); ++ add2_with_carry(carry, src1_hi, src1_lo, carry, src2, tmp1); ++ ror_imm(src1_lo, src1_lo, 32, tmp1); // back to big-endian ++ sd(src1_lo, Address(offset, 0)); ++ j(L_loop_2); ++ ++ bind(L_one); ++ sub(in, in, 4); ++ lwu(t0, Address(in, 0)); ++ mul(t1, t0, k); ++ add(t0, t1, out); ++ sub(offset, offset, 4); ++ lwu(t1, Address(offset, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(offset)); ++ srli(out, t0, 32); ++ ++ bind(L_end); ++} ++ ++/** ++ * Multiply 32 bit by 32 bit first loop. ++ */ ++void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx) ++{ ++ // long carry = 0; ++ // for (int j=ystart, k=ystart+1+xstart; j >= 0; j--, k--) { ++ // long product = (y[j] & LONG_MASK) * ++ // (x[xstart] & LONG_MASK) + carry; ++ // z[k] = (int)product; ++ // carry = product >>> 32; ++ // } ++ // z[xstart] = (int)carry; ++ ++ Label L_first_loop, L_first_loop_exit; ++ ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ lwu(x_xstart, Address(t0, 0)); ++ ++ bind(L_first_loop); ++ sub(idx, idx, 1); ++ bltz(idx, L_first_loop_exit); ++ ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ lwu(y_idx, Address(t0, 0)); ++ mul(product, x_xstart, y_idx); ++ add(product, product, carry); ++ srli(carry, product, 32); ++ sub(kdx, kdx, 1); ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sw(product, Address(t0, 0)); ++ j(L_first_loop); ++ ++ bind(L_first_loop_exit); ++} ++ ++/** ++ * Multiply 64 bit by 64 bit first loop. ++ */ ++void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx) ++{ ++ // ++ // jlong carry, x[], y[], z[]; ++ // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { ++ // huge_128 product = y[idx] * x[xstart] + carry; ++ // z[kdx] = (jlong)product; ++ // carry = (jlong)(product >>> 64); ++ // } ++ // z[xstart] = carry; ++ // ++ ++ Label L_first_loop, L_first_loop_exit; ++ Label L_one_x, L_one_y, L_multiply; ++ ++ sub(xstart, xstart, 1); ++ bltz(xstart, L_one_x); ++ ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ ld(x_xstart, Address(t0, 0)); ++ ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian ++ ++ bind(L_first_loop); ++ sub(idx, idx, 1); ++ bltz(idx, L_first_loop_exit); ++ sub(idx, idx, 1); ++ bltz(idx, L_one_y); ++ ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ ld(y_idx, Address(t0, 0)); ++ ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian ++ bind(L_multiply); ++ ++ mulhu(t0, x_xstart, y_idx); ++ mul(product, x_xstart, y_idx); ++ cad(product, product, carry, t1); ++ adc(carry, t0, zr, t1); ++ ++ sub(kdx, kdx, 2); ++ ror_imm(product, product, 32); // back to big-endian ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sd(product, Address(t0, 0)); ++ ++ j(L_first_loop); ++ ++ bind(L_one_y); ++ lwu(y_idx, Address(y, 0)); ++ j(L_multiply); ++ ++ bind(L_one_x); ++ lwu(x_xstart, Address(x, 0)); ++ j(L_first_loop); ++ ++ bind(L_first_loop_exit); ++} ++ ++/** ++ * Multiply 128 bit by 128. Unrolled inner loop. ++ * ++ */ ++void MacroAssembler::multiply_128_x_128_loop(Register y, Register z, ++ Register carry, Register carry2, ++ Register idx, Register jdx, ++ Register yz_idx1, Register yz_idx2, ++ Register tmp, Register tmp3, Register tmp4, ++ Register tmp6, Register product_hi) ++{ ++ // jlong carry, x[], y[], z[]; ++ // int kdx = xstart+1; ++ // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop ++ // huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry; ++ // jlong carry2 = (jlong)(tmp3 >>> 64); ++ // huge_128 tmp4 = (y[idx] * product_hi) + z[kdx+idx] + carry2; ++ // carry = (jlong)(tmp4 >>> 64); ++ // z[kdx+idx+1] = (jlong)tmp3; ++ // z[kdx+idx] = (jlong)tmp4; ++ // } ++ // idx += 2; ++ // if (idx > 0) { ++ // yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry; ++ // z[kdx+idx] = (jlong)yz_idx1; ++ // carry = (jlong)(yz_idx1 >>> 64); ++ // } ++ // ++ ++ Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; ++ ++ srli(jdx, idx, 2); ++ ++ bind(L_third_loop); ++ ++ sub(jdx, jdx, 1); ++ bltz(jdx, L_third_loop_exit); ++ sub(idx, idx, 4); ++ ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ ld(yz_idx2, Address(t0, 0)); ++ ld(yz_idx1, Address(t0, wordSize)); ++ ++ shadd(tmp6, idx, z, t0, LogBytesPerInt); ++ ++ ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian ++ ror_imm(yz_idx2, yz_idx2, 32); ++ ++ ld(t1, Address(tmp6, 0)); ++ ld(t0, Address(tmp6, wordSize)); ++ ++ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 ++ mulhu(tmp4, product_hi, yz_idx1); ++ ++ ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian ++ ror_imm(t1, t1, 32, tmp); ++ ++ mul(tmp, product_hi, yz_idx2); // yz_idx2 * product_hi -> carry2:tmp ++ mulhu(carry2, product_hi, yz_idx2); ++ ++ cad(tmp3, tmp3, carry, carry); ++ adc(tmp4, tmp4, zr, carry); ++ cad(tmp3, tmp3, t0, t0); ++ cadc(tmp4, tmp4, tmp, t0); ++ adc(carry, carry2, zr, t0); ++ cad(tmp4, tmp4, t1, carry2); ++ adc(carry, carry, zr, carry2); ++ ++ ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian ++ ror_imm(tmp4, tmp4, 32); ++ sd(tmp4, Address(tmp6, 0)); ++ sd(tmp3, Address(tmp6, wordSize)); ++ ++ j(L_third_loop); ++ ++ bind(L_third_loop_exit); ++ ++ andi(idx, idx, 0x3); ++ beqz(idx, L_post_third_loop_done); ++ ++ Label L_check_1; ++ sub(idx, idx, 2); ++ bltz(idx, L_check_1); ++ ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ ld(yz_idx1, Address(t0, 0)); ++ ror_imm(yz_idx1, yz_idx1, 32); ++ ++ mul(tmp3, product_hi, yz_idx1); // yz_idx1 * product_hi -> tmp4:tmp3 ++ mulhu(tmp4, product_hi, yz_idx1); ++ ++ shadd(t0, idx, z, t0, LogBytesPerInt); ++ ld(yz_idx2, Address(t0, 0)); ++ ror_imm(yz_idx2, yz_idx2, 32, tmp); ++ ++ add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp); ++ ++ ror_imm(tmp3, tmp3, 32, tmp); ++ sd(tmp3, Address(t0, 0)); ++ ++ bind(L_check_1); ++ ++ andi(idx, idx, 0x1); ++ sub(idx, idx, 1); ++ bltz(idx, L_post_third_loop_done); ++ shadd(t0, idx, y, t0, LogBytesPerInt); ++ lwu(tmp4, Address(t0, 0)); ++ mul(tmp3, tmp4, product_hi); // tmp4 * product_hi -> carry2:tmp3 ++ mulhu(carry2, tmp4, product_hi); ++ ++ shadd(t0, idx, z, t0, LogBytesPerInt); ++ lwu(tmp4, Address(t0, 0)); ++ ++ add2_with_carry(carry2, carry2, tmp3, tmp4, carry); ++ ++ shadd(t0, idx, z, t0, LogBytesPerInt); ++ sw(tmp3, Address(t0, 0)); ++ slli(t0, carry2, 32); ++ srli(carry, tmp3, 32); ++ orr(carry, carry, t0); ++ ++ bind(L_post_third_loop_done); ++} ++ ++/** ++ * Code for BigInteger::multiplyToLen() instrinsic. ++ * ++ * x10: x ++ * x11: xlen ++ * x12: y ++ * x13: ylen ++ * x14: z ++ * x15: zlen ++ * x16: tmp1 ++ * x17: tmp2 ++ * x7: tmp3 ++ * x28: tmp4 ++ * x29: tmp5 ++ * x30: tmp6 ++ * x31: tmp7 ++ */ ++void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, ++ Register z, Register zlen, ++ Register tmp1, Register tmp2, Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, Register product_hi) ++{ ++ assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); ++ ++ const Register idx = tmp1; ++ const Register kdx = tmp2; ++ const Register xstart = tmp3; ++ ++ const Register y_idx = tmp4; ++ const Register carry = tmp5; ++ const Register product = xlen; ++ const Register x_xstart = zlen; // reuse register ++ ++ mv(idx, ylen); // idx = ylen; ++ mv(kdx, zlen); // kdx = xlen+ylen; ++ mv(carry, zr); // carry = 0; ++ ++ Label L_multiply_64_or_128, L_done; ++ ++ sub(xstart, xlen, 1); ++ bltz(xstart, L_done); ++ ++ const Register jdx = tmp1; ++ ++ if (AvoidUnalignedAccesses) { ++ // if x and y are both 8 bytes aligend. ++ orr(t0, xlen, ylen); ++ andi(t0, t0, 0x1); ++ beqz(t0, L_multiply_64_or_128); ++ } else { ++ j(L_multiply_64_or_128); ++ } ++ ++ multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ ++ Label L_second_loop_1; ++ bind(L_second_loop_1); ++ mv(carry, zr); ++ mv(jdx, ylen); ++ sub(xstart, xstart, 1); ++ bltz(xstart, L_done); ++ sub(sp, sp, 2 * wordSize); ++ sd(z, Address(sp, 0)); ++ sd(zr, Address(sp, wordSize)); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ addi(z, t0, 4); ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ lwu(product, Address(t0, 0)); ++ Label L_third_loop, L_third_loop_exit; ++ ++ bind(L_third_loop); ++ sub(jdx, jdx, 1); ++ bltz(jdx, L_third_loop_exit); ++ ++ shadd(t0, jdx, y, t0, LogBytesPerInt); ++ lwu(t0, Address(t0, 0)); ++ mul(t1, t0, product); ++ add(t0, t1, carry); ++ shadd(tmp6, jdx, z, t1, LogBytesPerInt); ++ lwu(t1, Address(tmp6, 0)); ++ add(t0, t0, t1); ++ sw(t0, Address(tmp6, 0)); ++ srli(carry, t0, 32); ++ j(L_third_loop); ++ ++ bind(L_third_loop_exit); ++ ld(z, Address(sp, 0)); ++ addi(sp, sp, 2 * wordSize); ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ ++ j(L_second_loop_1); ++ ++ bind(L_multiply_64_or_128); ++ multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); ++ ++ Label L_second_loop_2; ++ beqz(kdx, L_second_loop_2); ++ ++ Label L_carry; ++ sub(kdx, kdx, 1); ++ beqz(kdx, L_carry); ++ ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ srli(carry, carry, 32); ++ sub(kdx, kdx, 1); ++ ++ bind(L_carry); ++ shadd(t0, kdx, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ ++ // Second and third (nested) loops. ++ // ++ // for (int i = xstart-1; i >= 0; i--) { // Second loop ++ // carry = 0; ++ // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop ++ // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + ++ // (z[k] & LONG_MASK) + carry; ++ // z[k] = (int)product; ++ // carry = product >>> 32; ++ // } ++ // z[i] = (int)carry; ++ // } ++ // ++ // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi ++ ++ bind(L_second_loop_2); ++ mv(carry, zr); // carry = 0; ++ mv(jdx, ylen); // j = ystart+1 ++ ++ sub(xstart, xstart, 1); // i = xstart-1; ++ bltz(xstart, L_done); ++ ++ sub(sp, sp, 4 * wordSize); ++ sd(z, Address(sp, 0)); ++ ++ Label L_last_x; ++ shadd(t0, xstart, z, t0, LogBytesPerInt); ++ addi(z, t0, 4); ++ sub(xstart, xstart, 1); // i = xstart-1; ++ bltz(xstart, L_last_x); ++ ++ shadd(t0, xstart, x, t0, LogBytesPerInt); ++ ld(product_hi, Address(t0, 0)); ++ ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian ++ ++ Label L_third_loop_prologue; ++ bind(L_third_loop_prologue); ++ ++ sd(ylen, Address(sp, wordSize)); ++ sd(x, Address(sp, 2 * wordSize)); ++ sd(xstart, Address(sp, 3 * wordSize)); ++ multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product, ++ tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi); ++ ld(z, Address(sp, 0)); ++ ld(ylen, Address(sp, wordSize)); ++ ld(x, Address(sp, 2 * wordSize)); ++ ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen ++ addi(sp, sp, 4 * wordSize); ++ ++ addi(tmp3, xlen, 1); ++ shadd(t0, tmp3, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ ++ sub(tmp3, tmp3, 1); ++ bltz(tmp3, L_done); ++ ++ // z[i] = (int) carry; ++ srli(carry, carry, 32); ++ shadd(t0, tmp3, z, t0, LogBytesPerInt); ++ sw(carry, Address(t0, 0)); ++ j(L_second_loop_2); ++ ++ // Next infrequent code is moved outside loops. ++ bind(L_last_x); ++ lwu(product_hi, Address(x, 0)); ++ j(L_third_loop_prologue); ++ ++ bind(L_done); ++} ++#endif // COMPILER2 ++ ++// Count bits of trailing zero chars from lsb to msb until first non-zero element. ++// For LL case, one byte for one element, so shift 8 bits once, and for other case, ++// shift 16 bits once. ++void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2) ++{ ++ if (UseZbb) { ++ assert_different_registers(Rd, Rs, tmp1); ++ int step = isLL ? 8 : 16; ++ ctz(Rd, Rs); ++ andi(tmp1, Rd, step - 1); ++ sub(Rd, Rd, tmp1); ++ return; ++ } ++ assert_different_registers(Rd, Rs, tmp1, tmp2); ++ Label Loop; ++ int step = isLL ? 8 : 16; ++ mv(Rd, -step); ++ mv(tmp2, Rs); ++ ++ bind(Loop); ++ addi(Rd, Rd, step); ++ andi(tmp1, tmp2, ((1 << step) - 1)); ++ srli(tmp2, tmp2, step); ++ beqz(tmp1, Loop); ++} ++ ++// This instruction reads adjacent 4 bytes from the lower half of source register, ++// inflate into a register, for example: ++// Rs: A7A6A5A4A3A2A1A0 ++// Rd: 00A300A200A100A0 ++void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2) ++{ ++ assert_different_registers(Rd, Rs, tmp1, tmp2); ++ mv(tmp1, 0xFF000000); // first byte mask at lower word ++ andr(Rd, Rs, tmp1); ++ for (int i = 0; i < 2; i++) { ++ slli(Rd, Rd, wordSize); ++ srli(tmp1, tmp1, wordSize); ++ andr(tmp2, Rs, tmp1); ++ orr(Rd, Rd, tmp2); ++ } ++ slli(Rd, Rd, wordSize); ++ andi(tmp2, Rs, 0xFF); // last byte mask at lower word ++ orr(Rd, Rd, tmp2); ++} ++ ++// This instruction reads adjacent 4 bytes from the upper half of source register, ++// inflate into a register, for example: ++// Rs: A7A6A5A4A3A2A1A0 ++// Rd: 00A700A600A500A4 ++void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2) ++{ ++ assert_different_registers(Rd, Rs, tmp1, tmp2); ++ srli(Rs, Rs, 32); // only upper 32 bits are needed ++ inflate_lo32(Rd, Rs, tmp1, tmp2); ++} ++ ++// The size of the blocks erased by the zero_blocks stub. We must ++// handle anything smaller than this ourselves in zero_words(). ++const int MacroAssembler::zero_words_block_size = 8; ++ ++// zero_words() is used by C2 ClearArray patterns. It is as small as ++// possible, handling small word counts locally and delegating ++// anything larger to the zero_blocks stub. It is expanded many times ++// in compiled code, so it is important to keep it short. ++ ++// ptr: Address of a buffer to be zeroed. ++// cnt: Count in HeapWords. ++// ++// ptr, cnt, and t0 are clobbered. ++address MacroAssembler::zero_words(Register ptr, Register cnt) ++{ ++ assert(is_power_of_2(zero_words_block_size), "adjust this"); ++ assert(ptr == x28 && cnt == x29, "mismatch in register usage"); ++ assert_different_registers(cnt, t0); ++ ++ BLOCK_COMMENT("zero_words {"); ++ mv(t0, zero_words_block_size); ++ Label around, done, done16; ++ bltu(cnt, t0, around); ++ { ++ RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks()); ++ assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); ++ if (StubRoutines::riscv::complete()) { ++ address tpc = trampoline_call(zero_blocks); ++ if (tpc == NULL) { ++ DEBUG_ONLY(reset_labels1(around)); ++ postcond(pc() == badAddress); ++ return NULL; ++ } ++ } else { ++ jal(zero_blocks); ++ } ++ } ++ bind(around); ++ for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) { ++ Label l; ++ andi(t0, cnt, i); ++ beqz(t0, l); ++ for (int j = 0; j < i; j++) { ++ sd(zr, Address(ptr, 0)); ++ addi(ptr, ptr, 8); ++ } ++ bind(l); ++ } ++ { ++ Label l; ++ andi(t0, cnt, 1); ++ beqz(t0, l); ++ sd(zr, Address(ptr, 0)); ++ bind(l); ++ } ++ BLOCK_COMMENT("} zero_words"); ++ postcond(pc() != badAddress); ++ return pc(); ++} ++ ++// base: Address of a buffer to be zeroed, 8 bytes aligned. ++// cnt: Immediate count in HeapWords. ++#define SmallArraySize (18 * BytesPerLong) ++void MacroAssembler::zero_words(Register base, uint64_t cnt) ++{ ++ assert_different_registers(base, t0, t1); ++ ++ BLOCK_COMMENT("zero_words {"); ++ ++ if (cnt <= SmallArraySize / BytesPerLong) { ++ for (int i = 0; i < (int)cnt; i++) { ++ sd(zr, Address(base, i * wordSize)); ++ } ++ } else { ++ const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll ++ int remainder = cnt % unroll; ++ for (int i = 0; i < remainder; i++) { ++ sd(zr, Address(base, i * wordSize)); ++ } ++ ++ Label loop; ++ Register cnt_reg = t0; ++ Register loop_base = t1; ++ cnt = cnt - remainder; ++ mv(cnt_reg, cnt); ++ add(loop_base, base, remainder * wordSize); ++ bind(loop); ++ sub(cnt_reg, cnt_reg, unroll); ++ for (int i = 0; i < unroll; i++) { ++ sd(zr, Address(loop_base, i * wordSize)); ++ } ++ add(loop_base, loop_base, unroll * wordSize); ++ bnez(cnt_reg, loop); ++ } ++ BLOCK_COMMENT("} zero_words"); ++} ++ ++// base: Address of a buffer to be filled, 8 bytes aligned. ++// cnt: Count in 8-byte unit. ++// value: Value to be filled with. ++// base will point to the end of the buffer after filling. ++void MacroAssembler::fill_words(Register base, Register cnt, Register value) ++{ ++// Algorithm: ++// ++// t0 = cnt & 7 ++// cnt -= t0 ++// p += t0 ++// switch (t0): ++// switch start: ++// do while cnt ++// cnt -= 8 ++// p[-8] = value ++// case 7: ++// p[-7] = value ++// case 6: ++// p[-6] = value ++// // ... ++// case 1: ++// p[-1] = value ++// case 0: ++// p += 8 ++// do-while end ++// switch end ++ ++ assert_different_registers(base, cnt, value, t0, t1); ++ ++ Label fini, skip, entry, loop; ++ const int unroll = 8; // Number of sd instructions we'll unroll ++ ++ beqz(cnt, fini); ++ ++ andi(t0, cnt, unroll - 1); ++ sub(cnt, cnt, t0); ++ // align 8, so first sd n % 8 = mod, next loop sd 8 * n. ++ shadd(base, t0, base, t1, 3); ++ la(t1, entry); ++ slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst) ++ sub(t1, t1, t0); ++ jr(t1); ++ ++ bind(loop); ++ add(base, base, unroll * 8); ++ for (int i = -unroll; i < 0; i++) { ++ sd(value, Address(base, i * 8)); ++ } ++ bind(entry); ++ sub(cnt, cnt, unroll); ++ bgez(cnt, loop); ++ ++ bind(fini); ++} ++ ++#define FCVT_SAFE(FLOATCVT, FLOATEQ) \ ++void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \ ++ Label L_Okay; \ ++ fscsr(zr); \ ++ FLOATCVT(dst, src); \ ++ frcsr(tmp); \ ++ andi(tmp, tmp, 0x1E); \ ++ beqz(tmp, L_Okay); \ ++ FLOATEQ(tmp, src, src); \ ++ bnez(tmp, L_Okay); \ ++ mv(dst, zr); \ ++ bind(L_Okay); \ ++} ++ ++FCVT_SAFE(fcvt_w_s, feq_s) ++FCVT_SAFE(fcvt_l_s, feq_s) ++FCVT_SAFE(fcvt_w_d, feq_d) ++FCVT_SAFE(fcvt_l_d, feq_d) ++ ++#undef FCVT_SAFE ++ ++#define FCMP(FLOATTYPE, FLOATSIG) \ ++void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1, \ ++ FloatRegister Rs2, int unordered_result) { \ ++ Label Ldone; \ ++ if (unordered_result < 0) { \ ++ /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */ \ ++ /* installs 1 if gt else 0 */ \ ++ flt_##FLOATSIG(result, Rs2, Rs1); \ ++ /* Rs1 > Rs2, install 1 */ \ ++ bgtz(result, Ldone); \ ++ feq_##FLOATSIG(result, Rs1, Rs2); \ ++ addi(result, result, -1); \ ++ /* Rs1 = Rs2, install 0 */ \ ++ /* NaN or Rs1 < Rs2, install -1 */ \ ++ bind(Ldone); \ ++ } else { \ ++ /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */ \ ++ /* installs 1 if gt or unordered else 0 */ \ ++ flt_##FLOATSIG(result, Rs1, Rs2); \ ++ /* Rs1 < Rs2, install -1 */ \ ++ bgtz(result, Ldone); \ ++ feq_##FLOATSIG(result, Rs1, Rs2); \ ++ addi(result, result, -1); \ ++ /* Rs1 = Rs2, install 0 */ \ ++ /* NaN or Rs1 > Rs2, install 1 */ \ ++ bind(Ldone); \ ++ neg(result, result); \ ++ } \ ++} ++ ++FCMP(float, s); ++FCMP(double, d); ++ ++#undef FCMP ++ ++// Zero words; len is in bytes ++// Destroys all registers except addr ++// len must be a nonzero multiple of wordSize ++void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) { ++ assert_different_registers(addr, len, tmp1, t0, t1); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ andi(t0, len, BytesPerWord - 1); ++ beqz(t0, L); ++ stop("len is not a multiple of BytesPerWord"); ++ bind(L); ++ } ++#endif // ASSERT ++ ++#ifndef PRODUCT ++ block_comment("zero memory"); ++#endif // PRODUCT ++ ++ Label loop; ++ Label entry; ++ ++ // Algorithm: ++ // ++ // t0 = cnt & 7 ++ // cnt -= t0 ++ // p += t0 ++ // switch (t0) { ++ // do { ++ // cnt -= 8 ++ // p[-8] = 0 ++ // case 7: ++ // p[-7] = 0 ++ // case 6: ++ // p[-6] = 0 ++ // ... ++ // case 1: ++ // p[-1] = 0 ++ // case 0: ++ // p += 8 ++ // } while (cnt) ++ // } ++ ++ const int unroll = 8; // Number of sd(zr) instructions we'll unroll ++ ++ srli(len, len, LogBytesPerWord); ++ andi(t0, len, unroll - 1); // t0 = cnt % unroll ++ sub(len, len, t0); // cnt -= unroll ++ // tmp1 always points to the end of the region we're about to zero ++ shadd(tmp1, t0, addr, t1, LogBytesPerWord); ++ la(t1, entry); ++ slli(t0, t0, 2); ++ sub(t1, t1, t0); ++ jr(t1); ++ bind(loop); ++ sub(len, len, unroll); ++ for (int i = -unroll; i < 0; i++) { ++ Assembler::sd(zr, Address(tmp1, i * wordSize)); ++ } ++ bind(entry); ++ add(tmp1, tmp1, unroll * wordSize); ++ bnez(len, loop); ++} ++ ++// shift left by shamt and add ++// Rd = (Rs1 << shamt) + Rs2 ++void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) { ++ if (UseZba) { ++ if (shamt == 1) { ++ sh1add(Rd, Rs1, Rs2); ++ return; ++ } else if (shamt == 2) { ++ sh2add(Rd, Rs1, Rs2); ++ return; ++ } else if (shamt == 3) { ++ sh3add(Rd, Rs1, Rs2); ++ return; ++ } ++ } ++ ++ if (shamt != 0) { ++ slli(tmp, Rs1, shamt); ++ add(Rd, Rs2, tmp); ++ } else { ++ add(Rd, Rs1, Rs2); ++ } ++} ++ ++void MacroAssembler::zero_extend(Register dst, Register src, int bits) { ++ if (UseZba && bits == 32) { ++ zext_w(dst, src); ++ return; ++ } ++ ++ if (UseZbb && bits == 16) { ++ zext_h(dst, src); ++ return; ++ } ++ ++ if (bits == 8) { ++ zext_b(dst, src); ++ } else { ++ slli(dst, src, XLEN - bits); ++ srli(dst, dst, XLEN - bits); ++ } ++} ++ ++void MacroAssembler::sign_extend(Register dst, Register src, int bits) { ++ if (UseZbb) { ++ if (bits == 8) { ++ sext_b(dst, src); ++ return; ++ } else if (bits == 16) { ++ sext_h(dst, src); ++ return; ++ } ++ } ++ ++ if (bits == 32) { ++ sext_w(dst, src); ++ } else { ++ slli(dst, src, XLEN - bits); ++ srai(dst, dst, XLEN - bits); ++ } ++} ++ ++void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp) ++{ ++ if (src1 == src2) { ++ mv(dst, zr); ++ return; ++ } ++ Label done; ++ Register left = src1; ++ Register right = src2; ++ if (dst == src1) { ++ assert_different_registers(dst, src2, tmp); ++ mv(tmp, src1); ++ left = tmp; ++ } else if (dst == src2) { ++ assert_different_registers(dst, src1, tmp); ++ mv(tmp, src2); ++ right = tmp; ++ } ++ ++ // installs 1 if gt else 0 ++ slt(dst, right, left); ++ bnez(dst, done); ++ slt(dst, left, right); ++ // dst = -1 if lt; else if eq , dst = 0 ++ neg(dst, dst); ++ bind(done); ++} ++ ++void MacroAssembler::load_constant_pool_cache(Register cpool, Register method) ++{ ++ ld(cpool, Address(method, Method::const_offset())); ++ ld(cpool, Address(cpool, ConstMethod::constants_offset())); ++ ld(cpool, Address(cpool, ConstantPool::cache_offset_in_bytes())); ++} ++ ++void MacroAssembler::load_max_stack(Register dst, Register method) ++{ ++ ld(dst, Address(xmethod, Method::const_offset())); ++ lhu(dst, Address(dst, ConstMethod::max_stack_offset())); ++} ++ ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and ra ++ // This should really be in_preserve_stack_slots ++ return r->reg2stack() * VMRegImpl::stack_slot_size; ++} ++ ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} ++ ++// On 64 bit we will store integer like items to the stack as ++// 64 bits items (riscv64 abi) even though java would only store ++// 32bits for a parameter. On 32bit it will simply be 32 bits ++// So this routine will do 32->32 on 32bit and 32->64 on 64bit ++void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) { ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ ld(tmp, Address(fp, reg2offset_in(src.first()))); ++ sd(tmp, Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ // stack to reg ++ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ if (dst.first() != src.first()) { ++ // 32bits extend sign ++ addw(dst.first()->as_Register(), src.first()->as_Register(), zr); ++ } ++ } ++} ++ ++// An oop arg. Must pass a handle not the oop itself ++void MacroAssembler::object_move(OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ assert_cond(map != NULL && receiver_offset != NULL); ++ // must pass a handle. First figure out the location we use as a handle ++ Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register(); ++ ++ // See if oop is NULL if it is we need no handle ++ ++ if (src.first()->is_stack()) { ++ // Oop is already on the stack as an argument ++ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } ++ ++ ld(t0, Address(fp, reg2offset_in(src.first()))); ++ la(rHandle, Address(fp, reg2offset_in(src.first()))); ++ // conditionally move a NULL ++ Label notZero1; ++ bnez(t0, notZero1); ++ mv(rHandle, zr); ++ bind(notZero1); ++ } else { ++ ++ // Oop is in a register we must store it to the space we reserve ++ // on the stack for oop_handles and pass a handle if oop is non-NULL ++ ++ const Register rOop = src.first()->as_Register(); ++ int oop_slot = -1; ++ if (rOop == j_rarg0) { ++ oop_slot = 0; ++ } else if (rOop == j_rarg1) { ++ oop_slot = 1; ++ } else if (rOop == j_rarg2) { ++ oop_slot = 2; ++ } else if (rOop == j_rarg3) { ++ oop_slot = 3; ++ } else if (rOop == j_rarg4) { ++ oop_slot = 4; ++ } else if (rOop == j_rarg5) { ++ oop_slot = 5; ++ } else if (rOop == j_rarg6) { ++ oop_slot = 6; ++ } else { ++ assert(rOop == j_rarg7, "wrong register"); ++ oop_slot = 7; ++ } ++ ++ oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot * VMRegImpl::stack_slot_size; ++ ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ // Store oop in handle area, may be NULL ++ sd(rOop, Address(sp, offset)); ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } ++ ++ //rOop maybe the same as rHandle ++ if (rOop == rHandle) { ++ Label isZero; ++ beqz(rOop, isZero); ++ la(rHandle, Address(sp, offset)); ++ bind(isZero); ++ } else { ++ Label notZero2; ++ la(rHandle, Address(sp, offset)); ++ bnez(rOop, notZero2); ++ mv(rHandle, zr); ++ bind(notZero2); ++ } ++ } ++ ++ // If arg is on the stack then place it otherwise it is already in correct reg. ++ if (dst.first()->is_stack()) { ++ sd(rHandle, Address(sp, reg2offset_out(dst.first()))); ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) { ++ assert(src.first()->is_stack() && dst.first()->is_stack() || ++ src.first()->is_reg() && dst.first()->is_reg() || ++ src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ lwu(tmp, Address(fp, reg2offset_in(src.first()))); ++ sw(tmp, Address(sp, reg2offset_out(dst.first()))); ++ } else if (dst.first()->is_Register()) { ++ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (src.first() != dst.first()) { ++ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { ++ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++// A long move ++void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) { ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ ld(tmp, Address(fp, reg2offset_in(src.first()))); ++ sd(tmp, Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ // stack to reg ++ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); ++ } else { ++ if (dst.first() != src.first()) { ++ mv(dst.first()->as_Register(), src.first()->as_Register()); ++ } ++ } ++} ++ ++// A double move ++void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { ++ assert(src.first()->is_stack() && dst.first()->is_stack() || ++ src.first()->is_reg() && dst.first()->is_reg() || ++ src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error"); ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ ld(tmp, Address(fp, reg2offset_in(src.first()))); ++ sd(tmp, Address(sp, reg2offset_out(dst.first()))); ++ } else if (dst.first()-> is_Register()) { ++ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first()))); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (src.first() != dst.first()) { ++ if (src.is_single_phys_reg() && dst.is_single_phys_reg()) { ++ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++} ++ ++void MacroAssembler::rt_call(address dest, Register tmp) { ++ CodeBlob *cb = CodeCache::find_blob(dest); ++ if (cb) { ++ far_call(RuntimeAddress(dest)); ++ } else { ++ int32_t offset = 0; ++ la_patchable(tmp, RuntimeAddress(dest), offset); ++ jalr(x1, tmp, offset); ++ } ++} +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +new file mode 100644 +index 000000000..a4d5ce0e0 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +@@ -0,0 +1,975 @@ ++/* ++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP ++#define CPU_RISCV_MACROASSEMBLER_RISCV_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "code/vmreg.hpp" ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. ++ ++class MacroAssembler: public Assembler { ++ ++ public: ++ MacroAssembler(CodeBuffer* code) : Assembler(code) { ++ } ++ virtual ~MacroAssembler() {} ++ ++ void safepoint_poll(Label& slow_path); ++ void safepoint_poll_acquire(Label& slow_path); ++ ++ // Alignment ++ void align(int modulus); ++ ++ // Stack frame creation/removal ++ // Note that SP must be updated to the right place before saving/restoring RA and FP ++ // because signal based thread suspend/resume could happend asychronously ++ void enter() { ++ addi(sp, sp, - 2 * wordSize); ++ sd(ra, Address(sp, wordSize)); ++ sd(fp, Address(sp)); ++ addi(fp, sp, 2 * wordSize); ++ } ++ ++ void leave() { ++ addi(sp, fp, - 2 * wordSize); ++ ld(fp, Address(sp)); ++ ld(ra, Address(sp, wordSize)); ++ addi(sp, sp, 2 * wordSize); ++ } ++ ++ ++ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) ++ // The pointer will be loaded into the thread register. ++ void get_thread(Register thread); ++ ++ // Support for VM calls ++ // ++ // It is imperative that all calls into the VM are handled via the call_VM macros. ++ // They make sure that the stack linkage is setup correctly. call_VM's correspond ++ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. ++ ++ void call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ // Overloadings with last_Java_sp ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments = 0, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ void get_vm_result(Register oop_result, Register java_thread); ++ void get_vm_result_2(Register metadata_result, Register java_thread); ++ ++ // These always tightly bind to MacroAssembler::call_VM_leaf_base ++ // bypassing the virtual implementation ++ void call_VM_leaf(address entry_point, ++ int number_of_arguments = 0); ++ void call_VM_leaf(address entry_point, ++ Register arg_0); ++ void call_VM_leaf(address entry_point, ++ Register arg_0, Register arg_1); ++ void call_VM_leaf(address entry_point, ++ Register arg_0, Register arg_1, Register arg_2); ++ ++ // These always tightly bind to MacroAssembler::call_VM_base ++ // bypassing the virtual implementation ++ void super_call_VM_leaf(address entry_point, Register arg_0); ++ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1); ++ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2); ++ void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); ++ ++ // last Java Frame (fills frame anchor) ++ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp); ++ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp); ++ void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp); ++ ++ // thread in the default location (xthread) ++ void reset_last_Java_frame(bool clear_fp); ++ ++ virtual void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments to pop after the call ++ Label* retaddr = NULL ++ ); ++ ++ virtual void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments to pop after the call ++ Label& retaddr) { ++ call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); ++ } ++ ++ virtual void call_VM_base( // returns the register containing the thread upon return ++ Register oop_result, // where an oop-result ends up if any; use noreg otherwise ++ Register java_thread, // the thread if computed before ; use noreg otherwise ++ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call ++ bool check_exceptions // whether to check for pending exceptions after return ++ ); ++ ++ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); ++ ++ virtual void check_and_handle_earlyret(Register java_thread); ++ virtual void check_and_handle_popframe(Register java_thread); ++ ++ void resolve_oop_handle(Register result, Register tmp = x15); ++ void resolve_jobject(Register value, Register thread, Register tmp); ++ ++ void movoop(Register dst, jobject obj, bool immediate = false); ++ void mov_metadata(Register dst, Metadata* obj); ++ void bang_stack_size(Register size, Register tmp); ++ void set_narrow_oop(Register dst, jobject obj); ++ void set_narrow_klass(Register dst, Klass* k); ++ ++ void load_mirror(Register dst, Register method, Register tmp = x15); ++ void access_load_at(BasicType type, DecoratorSet decorators, Register dst, ++ Address src, Register tmp1, Register thread_tmp); ++ void access_store_at(BasicType type, DecoratorSet decorators, Address dst, ++ Register src, Register tmp1, Register tmp2, Register tmp3); ++ void load_klass(Register dst, Register src); ++ void store_klass(Register dst, Register src); ++ void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L); ++ ++ void encode_klass_not_null(Register r); ++ void decode_klass_not_null(Register r); ++ void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); ++ void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); ++ void decode_heap_oop_not_null(Register r); ++ void decode_heap_oop_not_null(Register dst, Register src); ++ void decode_heap_oop(Register d, Register s); ++ void decode_heap_oop(Register r) { decode_heap_oop(r, r); } ++ void encode_heap_oop(Register d, Register s); ++ void encode_heap_oop(Register r) { encode_heap_oop(r, r); }; ++ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, ++ Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0); ++ ++ void store_klass_gap(Register dst, Register src); ++ ++ // currently unimplemented ++ // Used for storing NULL. All other oop constants should be ++ // stored using routines that take a jobject. ++ void store_heap_oop_null(Address dst); ++ ++ // This dummy is to prevent a call to store_heap_oop from ++ // converting a zero (linke NULL) into a Register by giving ++ // the compiler two choices it can't resolve ++ ++ void store_heap_oop(Address dst, void* dummy); ++ ++ // Support for NULL-checks ++ // ++ // Generates code that causes a NULL OS exception if the content of reg is NULL. ++ // If the accessed location is M[reg + offset] and the offset is known, provide the ++ // offset. No explicit code generateion is needed if the offset is within a certain ++ // range (0 <= offset <= page_size). ++ ++ virtual void null_check(Register reg, int offset = -1); ++ static bool needs_explicit_null_check(intptr_t offset); ++ ++ // idiv variant which deals with MINLONG as dividend and -1 as divisor ++ int corrected_idivl(Register result, Register rs1, Register rs2, ++ bool want_remainder); ++ int corrected_idivq(Register result, Register rs1, Register rs2, ++ bool want_remainder); ++ ++ // interface method calling ++ void lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_tmp, ++ Label& no_such_interface, ++ bool return_method = true); ++ ++ // virtual method calling ++ // n.n. x86 allows RegisterOrConstant for vtable_index ++ void lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result); ++ ++ // allocation ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp1, // temp register ++ Label& slow_case, // continuation point if fast allocation fails ++ bool is_far = false ++ ); ++ void tlab_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register tmp1, // temp register ++ Register tmp2, // temp register ++ Label& slow_case, // continuation point of fast allocation fails ++ bool is_far = false ++ ); ++ ++ // Test sub_klass against super_klass, with fast and slow paths. ++ ++ // The fast path produces a tri-state answer: yes / no / maybe-slow. ++ // One of the three labels can be NULL, meaning take the fall-through. ++ // If super_check_offset is -1, the value is loaded up from super_klass. ++ // No registers are killed, except tmp_reg ++ void check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ Register super_check_offset = noreg); ++ ++ // The reset of the type cehck; must be wired to a corresponding fast path. ++ // It does not repeat the fast path logic, so don't use it standalone. ++ // The tmp_reg and tmp2_reg can be noreg, if no tmps are avaliable. ++ // Updates the sub's secondary super cache as necessary. ++ void check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Register tmp2_reg, ++ Label* L_success, ++ Label* L_failure); ++ ++ void check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register tmp_reg, ++ Label& L_success); ++ ++ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); ++ ++ // only if +VerifyOops ++ void verify_oop(Register reg, const char* s = "broken oop"); ++ void verify_oop_addr(Address addr, const char* s = "broken oop addr"); ++ ++ void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {} ++ void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {} ++ ++#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) ++#define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) ++ ++ // A more convenient access to fence for our purposes ++ // We used four bit to indicate the read and write bits in the predecessors and successors, ++ // and extended i for r, o for w if UseConservativeFence enabled. ++ enum Membar_mask_bits { ++ StoreStore = 0b0101, // (pred = ow + succ = ow) ++ LoadStore = 0b1001, // (pred = ir + succ = ow) ++ StoreLoad = 0b0110, // (pred = ow + succ = ir) ++ LoadLoad = 0b1010, // (pred = ir + succ = ir) ++ AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw) ++ }; ++ ++ void membar(uint32_t order_constraint); ++ ++ static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) { ++ predecessor = (order_constraint >> 2) & 0x3; ++ successor = order_constraint & 0x3; ++ ++ // extend rw -> iorw: ++ // 01(w) -> 0101(ow) ++ // 10(r) -> 1010(ir) ++ // 11(rw)-> 1111(iorw) ++ if (UseConservativeFence) { ++ predecessor |= predecessor << 2; ++ successor |= successor << 2; ++ } ++ } ++ ++ static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) { ++ return ((predecessor & 0x3) << 2) | (successor & 0x3); ++ } ++ ++ // prints msg, dumps registers and stops execution ++ void stop(const char* msg); ++ ++ static void debug64(char* msg, int64_t pc, int64_t regs[]); ++ ++ void unimplemented(const char* what = ""); ++ ++ void should_not_reach_here() { stop("should not reach here"); } ++ ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ return RegisterOrConstant(tmp); ++ } ++ ++ static address target_addr_for_insn(address insn_addr); ++ ++ // Required platform-specific helpers for Label::patch_instructions. ++ // They _shadow_ the declarations in AbstractAssembler, which are undefined. ++ static int pd_patch_instruction_size(address branch, address target) ; ++ void pd_patch_instruction(address branch, address target) { ++ pd_patch_instruction_size(branch, target); ++ } ++ static address pd_call_destination(address branch) { ++ return target_addr_for_insn(branch); ++ } ++ ++ static int patch_oop(address insn_addr, address o); ++ address emit_trampoline_stub(int insts_call_instruction_offset, address target); ++ void emit_static_call_stub(); ++ ++ // The following 4 methods return the offset of the appropriate move instruction ++ ++ // Support for fast byte/short loading with zero extension (depending on particular CPU) ++ int load_unsigned_byte(Register dst, Address src); ++ int load_unsigned_short(Register dst, Address src); ++ ++ // Support for fast byte/short loading with sign extension (depending on particular CPU) ++ int load_signed_byte(Register dst, Address src); ++ int load_signed_short(Register dst, Address src); ++ ++ // Load and store values by size and signed-ness ++ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); ++ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); ++ ++ public: ++ // enum used for riscv--x86 linkage to define return type of x86 function ++ enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double}; ++ ++ // Standard pseudoinstruction ++ void nop(); ++ void mv(Register Rd, Register Rs) ; ++ void notr(Register Rd, Register Rs); ++ void neg(Register Rd, Register Rs); ++ void negw(Register Rd, Register Rs); ++ void sext_w(Register Rd, Register Rs); ++ void zext_b(Register Rd, Register Rs); ++ void seqz(Register Rd, Register Rs); // set if = zero ++ void snez(Register Rd, Register Rs); // set if != zero ++ void sltz(Register Rd, Register Rs); // set if < zero ++ void sgtz(Register Rd, Register Rs); // set if > zero ++ ++ // Float pseudoinstruction ++ void fmv_s(FloatRegister Rd, FloatRegister Rs); ++ void fabs_s(FloatRegister Rd, FloatRegister Rs); // single-precision absolute value ++ void fneg_s(FloatRegister Rd, FloatRegister Rs); ++ ++ // Double pseudoinstruction ++ void fmv_d(FloatRegister Rd, FloatRegister Rs); ++ void fabs_d(FloatRegister Rd, FloatRegister Rs); ++ void fneg_d(FloatRegister Rd, FloatRegister Rs); ++ ++ // Pseudoinstruction for control and status register ++ void rdinstret(Register Rd); // read instruction-retired counter ++ void rdcycle(Register Rd); // read cycle counter ++ void rdtime(Register Rd); // read time ++ void csrr(Register Rd, unsigned csr); // read csr ++ void csrw(unsigned csr, Register Rs); // write csr ++ void csrs(unsigned csr, Register Rs); // set bits in csr ++ void csrc(unsigned csr, Register Rs); // clear bits in csr ++ void csrwi(unsigned csr, unsigned imm); ++ void csrsi(unsigned csr, unsigned imm); ++ void csrci(unsigned csr, unsigned imm); ++ void frcsr(Register Rd); // read float-point csr ++ void fscsr(Register Rd, Register Rs); // swap float-point csr ++ void fscsr(Register Rs); // write float-point csr ++ void frrm(Register Rd); // read float-point rounding mode ++ void fsrm(Register Rd, Register Rs); // swap float-point rounding mode ++ void fsrm(Register Rs); // write float-point rounding mode ++ void fsrmi(Register Rd, unsigned imm); ++ void fsrmi(unsigned imm); ++ void frflags(Register Rd); // read float-point exception flags ++ void fsflags(Register Rd, Register Rs); // swap float-point exception flags ++ void fsflags(Register Rs); // write float-point exception flags ++ void fsflagsi(Register Rd, unsigned imm); ++ void fsflagsi(unsigned imm); ++ ++ void beqz(Register Rs, const address &dest); ++ void blez(Register Rs, const address &dest); ++ void bgez(Register Rs, const address &dest); ++ void bltz(Register Rs, const address &dest); ++ void bgtz(Register Rs, const address &dest); ++ void bnez(Register Rs, const address &dest); ++ void la(Register Rd, Label &label); ++ void la(Register Rd, const address &dest); ++ void la(Register Rd, const Address &adr); ++ //label ++ void beqz(Register Rs, Label &l, bool is_far = false); ++ void bnez(Register Rs, Label &l, bool is_far = false); ++ void blez(Register Rs, Label &l, bool is_far = false); ++ void bgez(Register Rs, Label &l, bool is_far = false); ++ void bltz(Register Rs, Label &l, bool is_far = false); ++ void bgtz(Register Rs, Label &l, bool is_far = false); ++ void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); ++ ++ void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } } ++ void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } } ++ void push_reg(Register Rs); ++ void pop_reg(Register Rd); ++ int push_reg(unsigned int bitset, Register stack); ++ int pop_reg(unsigned int bitset, Register stack); ++ static RegSet call_clobbered_registers(); ++ void push_call_clobbered_registers(); ++ void pop_call_clobbered_registers(); ++ void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); ++ void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); ++ ++ // if heap base register is used - reinit it with the correct value ++ void reinit_heapbase(); ++ ++ void bind(Label& L) { ++ Assembler::bind(L); ++ // fences across basic blocks should not be merged ++ code()->clear_last_insn(); ++ } ++ ++ // mv ++ void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); } ++ ++ inline void mv(Register Rd, int imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, long imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, long long imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, unsigned int imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, unsigned long imm64) { li(Rd, (int64_t)imm64); } ++ inline void mv(Register Rd, unsigned long long imm64) { li(Rd, (int64_t)imm64); } ++ ++ inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } ++ ++ void mv(Register Rd, Address dest); ++ void mv(Register Rd, RegisterOrConstant src); ++ ++ // logic ++ void andrw(Register Rd, Register Rs1, Register Rs2); ++ void orrw(Register Rd, Register Rs1, Register Rs2); ++ void xorrw(Register Rd, Register Rs1, Register Rs2); ++ ++ // vext ++ void vmnot_m(VectorRegister vd, VectorRegister vs); ++ void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); ++ void vfneg_v(VectorRegister vd, VectorRegister vs); ++ ++ // support for argument shuffling ++ void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0); ++ void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0); ++ void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0); ++ void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0); ++ void object_move(OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset); ++ ++ void rt_call(address dest, Register tmp = t0); ++ ++ // revb ++ void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend ++ void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend ++ void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend ++ void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend ++ void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower ++ void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword ++ void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word ++ void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword ++ ++ void andi(Register Rd, Register Rn, int64_t increment, Register tmp = t0); ++ void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); ++ ++ // Support for serializing memory accesses between threads ++ void serialize_memory(Register thread, Register tmp1, Register tmp2); ++ ++ void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); ++ void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail) ; ++ void cmpxchg(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, bool result_as_bool = false); ++ void cmpxchg_weak(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result); ++ void cmpxchg_narrow_value_helper(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Register tmp1, Register tmp2, Register tmp3); ++ void cmpxchg_narrow_value(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, bool result_as_bool, ++ Register tmp1, Register tmp2, Register tmp3); ++ void weak_cmpxchg_narrow_value(Register addr, Register expected, ++ Register new_val, ++ enum operand_size size, ++ Assembler::Aqrl acquire, Assembler::Aqrl release, ++ Register result, ++ Register tmp1, Register tmp2, Register tmp3); ++ ++ void atomic_add(Register prev, RegisterOrConstant incr, Register addr); ++ void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); ++ void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); ++ void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); ++ ++ void atomic_xchg(Register prev, Register newv, Register addr); ++ void atomic_xchgw(Register prev, Register newv, Register addr); ++ void atomic_xchgal(Register prev, Register newv, Register addr); ++ void atomic_xchgalw(Register prev, Register newv, Register addr); ++ void atomic_xchgwu(Register prev, Register newv, Register addr); ++ void atomic_xchgalwu(Register prev, Register newv, Register addr); ++ ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // swap_reg is killed. ++ // tmp_reg must be supplied and must not be t0 or t1 ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ int biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL, ++ Register flag = noreg); ++ void biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag = noreg); ++ ++ static bool far_branches() { ++ return ReservedCodeCacheSize > branch_range; ++ } ++ ++ //atomic ++ void atomic_incw(Register counter_addr, Register tmp1); ++ void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { ++ la(tmp1, counter_addr); ++ atomic_incw(tmp1, tmp2); ++ } ++ ++ // Jumps that can reach anywhere in the code cache. ++ // Trashes tmp. ++ void far_call(Address entry, Register tmp = t0); ++ void far_jump(Address entry, Register tmp = t0); ++ ++ static int far_branch_size() { ++ if (far_branches()) { ++ return 2 * 4; // auipc + jalr, see far_call() & far_jump() ++ } else { ++ return 4; ++ } ++ } ++ ++ void load_byte_map_base(Register reg); ++ ++ void bang_stack_with_offset(int offset) { ++ // stack grows down, caller passes positive offset ++ assert(offset > 0, "must bang with negative offset"); ++ sub(t1, sp, offset); ++ sd(zr, Address(t1)); ++ } ++ ++ void la_patchable(Register reg1, const Address &dest, int32_t &offset); ++ ++ virtual void _call_Unimplemented(address call_site) { ++ mv(t1, call_site); ++ } ++ #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) ++ ++#ifdef COMPILER2 ++ void spill(Register Rx, bool is64, int offset) { ++ is64 ? sd(Rx, Address(sp, offset)) ++ : sw(Rx, Address(sp, offset)); ++ } ++ ++ void spill(FloatRegister Rx, bool is64, int offset) { ++ is64 ? fsd(Rx, Address(sp, offset)) ++ : fsw(Rx, Address(sp, offset)); ++ } ++ ++ void spill(VectorRegister Vx, int offset) { ++ add(t0, sp, offset); ++ vs1r_v(Vx, t0); ++ } ++ ++ void unspill(Register Rx, bool is64, int offset) { ++ is64 ? ld(Rx, Address(sp, offset)) ++ : lw(Rx, Address(sp, offset)); ++ } ++ ++ void unspillu(Register Rx, bool is64, int offset) { ++ is64 ? ld(Rx, Address(sp, offset)) ++ : lwu(Rx, Address(sp, offset)); ++ } ++ ++ void unspill(FloatRegister Rx, bool is64, int offset) { ++ is64 ? fld(Rx, Address(sp, offset)) ++ : flw(Rx, Address(sp, offset)); ++ } ++ ++ void unspill(VectorRegister Vx, int offset) { ++ add(t0, sp, offset); ++ vl1r_v(Vx, t0); ++ } ++ ++ void spill_copy_vector_stack_to_stack(int src_offset, int dst_offset, ++ int vec_reg_size_in_bytes) { ++ assert(vec_reg_size_in_bytes % 16 == 0, "unexpected vector reg size"); ++ unspill(v0, src_offset); ++ spill(v0, dst_offset); ++ } ++ ++#endif // COMPILER2 ++ ++ // Frame creation and destruction shared between JITs. ++ void build_frame(int framesize); ++ void remove_frame(int framesize); ++ ++ void reserved_stack_check(); ++ void get_polling_page(Register dest, address page, int32_t &offset, relocInfo::relocType rtype); ++ void read_polling_page(Register r, address page, relocInfo::relocType rtype); ++ void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); ++ // Return: the call PC ++ address trampoline_call(Address entry); ++ address ic_call(address entry, jint method_index = 0); ++ // Support for memory inc/dec ++ // n.b. increment/decrement calls with an Address destination will ++ // need to use a scratch register to load the value to be ++ // incremented. increment/decrement calls which add or subtract a ++ // constant value other than sign-extended 12-bit immediate will need ++ // to use a 2nd scratch register to hold the constant. so, an address ++ // increment/decrement may trash both t0 and t1. ++ ++ void increment(const Address dst, int64_t value = 1); ++ void incrementw(const Address dst, int32_t value = 1); ++ ++ void decrement(const Address dst, int64_t value = 1); ++ void decrementw(const Address dst, int32_t value = 1); ++ void cmpptr(Register src1, Address src2, Label& equal); ++ void oop_equal(Register obj1, Register obj2, Label& equal, bool is_far = false); // cmpoop ++ void oop_nequal(Register obj1, Register obj2, Label& nequal, bool is_far = false); ++ void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); ++#ifdef COMPILER2 ++ void minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, bool is_double, bool is_min); ++ ++ address arrays_equals(Register a1, Register a2, Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, Register result, Register cnt1, int elem_size); ++ ++ void string_equals(Register a1, Register a2, Register result, Register cnt1, ++ int elem_size); ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ Register tmp1, Register tmp2, Register tmp3, int ae); ++ void string_indexof_char_short(Register str1, Register cnt1, ++ Register ch, Register result, ++ bool isL); ++ void string_indexof_char(Register str1, Register cnt1, ++ Register ch, Register result, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ bool isL); ++ void string_indexof(Register str1, Register str2, ++ Register cnt1, Register cnt2, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, ++ Register result, int ae); ++ void string_indexof_linearscan(Register haystack, Register needle, ++ Register haystack_len, Register needle_len, ++ Register tmp1, Register tmp2, ++ Register tmp3, Register tmp4, ++ int needle_con_cnt, Register result, int ae); ++ void compute_index(Register str1, Register trailing_zero, Register match_mask, ++ Register result, Register char_tmp, Register tmp, ++ bool haystack_isL); ++ void compute_match_mask(Register src, Register pattern, Register match_mask, ++ Register mask1, Register mask2); ++ void cad(Register dst, Register src1, Register src2, Register carry); ++ void cadc(Register dst, Register src1, Register src2, Register carry); ++ void adc(Register dst, Register src1, Register src2, Register carry); ++ void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, ++ Register src1, Register src2, Register carry = t0); ++ void mul_add(Register out, Register in, Register offset, ++ Register len, Register k, Register tmp1, Register tmp2); ++ void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx); ++ void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, ++ Register y, Register y_idx, Register z, ++ Register carry, Register product, ++ Register idx, Register kdx); ++ void multiply_128_x_128_loop(Register y, Register z, ++ Register carry, Register carry2, ++ Register idx, Register jdx, ++ Register yz_idx1, Register yz_idx2, ++ Register tmp, Register tmp3, Register tmp4, ++ Register tmp6, Register product_hi); ++ void multiply_to_len(Register x, Register xlen, Register y, Register ylen, ++ Register z, Register zlen, ++ Register tmp1, Register tmp2, Register tmp3, Register tmp4, ++ Register tmp5, Register tmp6, Register product_hi); ++#endif // COMPILER2 ++ void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); ++ void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); ++ ++ void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); ++ void zero_words(Register base, uint64_t cnt); ++ address zero_words(Register ptr, Register cnt); ++ void fill_words(Register base, Register cnt, Register value); ++ void zero_memory(Register addr, Register len, Register tmp1); ++ ++ // shift left by shamt and add ++ void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); ++ ++#ifdef COMPILER2 ++ // refer to conditional_branches and float_conditional_branches ++ static const int bool_test_bits = 3; ++ static const int neg_cond_bits = 2; ++ static const int unsigned_branch_mask = 1 << bool_test_bits; ++ static const int double_branch_mask = 1 << bool_test_bits; ++ ++ void enc_cmove(int cmpFlag, Register op1, Register op2, Register dst, Register src); ++ ++ // cmp ++ void cmp_branch(int cmpFlag, Register op1, Register op2, Label& label, bool is_far = false); ++ void float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far = false); ++ ++ void enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false); ++ void enc_cmpEqNe_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far = false); ++ ++ // intrinsic methods implemented by vector instructions ++ void string_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size); ++ void arrays_equals_v(Register a1, Register a2, Register result, Register cnt1, int elem_size); ++ void string_compare_v(Register str1, Register str2, Register cnt1, Register cnt2, ++ Register result, Register tmp1, Register tmp2, int encForm); ++ ++ void clear_array_v(Register base, Register cnt); ++ address byte_array_inflate_v(Register src, Register dst, Register len, Register tmp); ++ void char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp); ++ void encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp); ++ ++ address has_negatives_v(Register ary, Register len, Register result, Register tmp); ++#endif ++ ++ // Here the float instructions with safe deal with some exceptions. ++ // e.g. convert from NaN, +Inf, -Inf to int, float, double ++ // will trigger exception, we need to deal with these situations ++ // to get correct results. ++ void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0); ++ void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0); ++ void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); ++ void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); ++ ++ // vector load/store unit-stride instructions ++ void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { ++ switch (sew) { ++ case Assembler::e64: ++ vle64_v(vd, base, vm); ++ break; ++ case Assembler::e32: ++ vle32_v(vd, base, vm); ++ break; ++ case Assembler::e16: ++ vle16_v(vd, base, vm); ++ break; ++ case Assembler::e8: // fall through ++ default: ++ vle8_v(vd, base, vm); ++ break; ++ } ++ } ++ ++ void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { ++ switch (sew) { ++ case Assembler::e64: ++ vse64_v(store_data, base, vm); ++ break; ++ case Assembler::e32: ++ vse32_v(store_data, base, vm); ++ break; ++ case Assembler::e16: ++ vse16_v(store_data, base, vm); ++ break; ++ case Assembler::e8: // fall through ++ default: ++ vse8_v(store_data, base, vm); ++ break; ++ } ++ } ++ ++ static const int zero_words_block_size; ++ ++ void cast_primitive_type(BasicType type, Register Rt) { ++ switch (type) { ++ case T_BOOLEAN: ++ sltu(Rt, zr, Rt); ++ break; ++ case T_CHAR : ++ zero_extend(Rt, Rt, 16); ++ break; ++ case T_BYTE : ++ sign_extend(Rt, Rt, 8); ++ break; ++ case T_SHORT : ++ sign_extend(Rt, Rt, 16); ++ break; ++ case T_INT : ++ addw(Rt, Rt, zr); ++ break; ++ case T_LONG : /* nothing to do */ break; ++ case T_VOID : /* nothing to do */ break; ++ case T_FLOAT : /* nothing to do */ break; ++ case T_DOUBLE : /* nothing to do */ break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ ++ // float cmp with unordered_result ++ void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); ++ void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); ++ ++ // Zero/Sign-extend ++ void zero_extend(Register dst, Register src, int bits); ++ void sign_extend(Register dst, Register src, int bits); ++ ++ // compare src1 and src2 and get -1/0/1 in dst. ++ // if [src1 > src2], dst = 1; ++ // if [src1 == src2], dst = 0; ++ // if [src1 < src2], dst = -1; ++ void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0); ++ ++ void load_constant_pool_cache(Register cpool, Register method); ++ ++ void load_max_stack(Register dst, Register method); ++ ++private: ++ void load_prototype_header(Register dst, Register src); ++ void repne_scan(Register addr, Register value, Register count, Register tmp); ++ ++#ifdef ASSERT ++ // Macro short-hand support to clean-up after a failed call to trampoline ++ // call generation (see trampoline_call() below), when a set of Labels must ++ // be reset (before returning). ++#define reset_labels1(L1) L1.reset() ++#define reset_labels2(L1, L2) L1.reset(); L2.reset() ++#define reset_labels3(L1, L2, L3) L1.reset(); reset_labels2(L2, L3) ++#define reset_labels5(L1, L2, L3, L4, L5) reset_labels2(L1, L2); reset_labels3(L3, L4, L5) ++#endif ++ ++ // Return true if an address is within the 48-bit RISCV64 address space. ++ bool is_valid_riscv64_address(address addr) { ++ // sv48: must have bits 63-48 all equal to bit 47 ++ return ((uintptr_t)addr >> 47) == 0; ++ } ++ ++ void ld_constant(Register dest, const Address &const_addr) { ++ if (NearCpool) { ++ ld(dest, const_addr); ++ } else { ++ int32_t offset = 0; ++ la_patchable(dest, InternalAddress(const_addr.target()), offset); ++ ld(dest, Address(dest, offset)); ++ } ++ } ++ ++ int bitset_to_regs(unsigned int bitset, unsigned char* regs); ++ Address add_memory_helper(const Address dst); ++ ++ void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); ++ void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); ++ ++#ifdef COMPILER2 ++ void element_compare(Register a1, Register a2, Register result, Register cnt, Register tmp1, Register tmp2, ++ VectorRegister vr1, VectorRegister vr2, VectorRegister vrs, bool islatin, Label &DONE); ++#endif // COMPILER2 ++}; ++ ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } ++#endif ++ ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++ private: ++ MacroAssembler* _masm; ++ Label _label; ++ ++ public: ++ SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); ++ ~SkipIfEqual(); ++}; ++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp +new file mode 100644 +index 000000000..fc2b191c0 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.inline.hpp +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP ++#define CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP ++ ++#endif // CPU_RISCV_MACROASSEMBLER_RISCV_INLINE_HPP +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.cpp b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +new file mode 100644 +index 000000000..d049193d4 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.cpp +@@ -0,0 +1,440 @@ ++/* ++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "classfile/javaClasses.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/flags/flagSetting.hpp" ++#include "runtime/frame.inline.hpp" ++ ++#define __ _masm-> ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ if (VerifyMethodHandles) { ++ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), ++ "MH argument is a Class"); ++ } ++ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); ++} ++ ++#ifdef ASSERT ++static int check_nonzero(const char* xname, int x) { ++ assert(x != 0, "%s should be nonzero", xname); ++ return x; ++} ++#define NONZERO(x) check_nonzero(#x, x) ++#else //ASSERT ++#define NONZERO(x) (x) ++#endif //PRODUCT ++ ++#ifdef ASSERT ++void MethodHandles::verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message) { ++ InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); ++ Klass* klass = SystemDictionary::well_known_klass(klass_id); ++ Register temp = t1; ++ Register temp2 = t0; // used by MacroAssembler::cmpptr ++ Label L_ok, L_bad; ++ BLOCK_COMMENT("verify_klass {"); ++ __ verify_oop(obj); ++ __ beqz(obj, L_bad); ++ __ push_reg(RegSet::of(temp, temp2), sp); ++ __ load_klass(temp, obj); ++ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok); ++ intptr_t super_check_offset = klass->super_check_offset(); ++ __ ld(temp, Address(temp, super_check_offset)); ++ __ cmpptr(temp, ExternalAddress((address) klass_addr), L_ok); ++ __ pop_reg(RegSet::of(temp, temp2), sp); ++ __ bind(L_bad); ++ __ stop(error_message); ++ __ BIND(L_ok); ++ __ pop_reg(RegSet::of(temp, temp2), sp); ++ BLOCK_COMMENT("} verify_klass"); ++} ++ ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { } ++ ++#endif //ASSERT ++ ++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry) { ++ assert(method == xmethod, "interpreter calling convention"); ++ Label L_no_such_method; ++ __ beqz(xmethod, L_no_such_method); ++ __ verify_method_ptr(method); ++ ++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ ++ __ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset())); ++ __ beqz(t0, run_compiled_code); ++ __ ld(t0, Address(method, Method::interpreter_entry_offset())); ++ __ jr(t0); ++ __ BIND(run_compiled_code); ++ } ++ ++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : ++ Method::from_interpreted_offset(); ++ __ ld(t0,Address(method, entry_offset)); ++ __ jr(t0); ++ __ bind(L_no_such_method); ++ __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry())); ++} ++ ++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry) { ++ BLOCK_COMMENT("jump_to_lambda_form {"); ++ // This is the initial entry point of a lazy method handle. ++ // After type checking, it picks up the invoker from the LambdaForm. ++ assert_different_registers(recv, method_temp, temp2); ++ assert(recv != noreg, "required register"); ++ assert(method_temp == xmethod, "required register for loading method"); ++ ++ // Load the invoker, as MH -> MH.form -> LF.vmentry ++ __ verify_oop(recv); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); ++ ++ if (VerifyMethodHandles && !for_compiler_entry) { ++ // make sure recv is already on stack ++ __ ld(temp2, Address(method_temp, Method::const_offset())); ++ __ load_sized_value(temp2, ++ Address(temp2, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), /*is_signed*/ false); ++ Label L; ++ __ ld(t0, __ argument_address(temp2, -1)); ++ __ oop_equal(recv, t0, L); ++ __ ld(x10, __ argument_address(temp2, -1)); ++ __ ebreak(); ++ __ BIND(L); ++ } ++ ++ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); ++ BLOCK_COMMENT("} jump_to_lambda_form"); ++} ++ ++// Code generation ++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, ++ vmIntrinsics::ID iid) { ++ const bool not_for_compiler_entry = false; // this is the interpreter entry ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ if (iid == vmIntrinsics::_invokeGeneric || ++ iid == vmIntrinsics::_compiledLambdaForm) { ++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. ++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. ++ // They all allow an appendix argument. ++ __ ebreak(); // empty stubs make SG sick ++ return NULL; ++ } ++ ++ // x30: sender SP (must preserve; see prepare_to_jump_from_interpreted) ++ // xmethod: Method* ++ // x13: argument locator (parameter slot count, added to sp) ++ // x11: used as temp to hold mh or receiver ++ Register argp = x13; // argument list ptr, live on error paths ++ Register mh = x11; // MH receiver; dies quickly and is recycled ++ ++ // here's where control starts out: ++ __ align(CodeEntryAlignment); ++ address entry_point = __ pc(); ++ ++ if (VerifyMethodHandles) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ Label L; ++ BLOCK_COMMENT("verify_intrinsic_id {"); ++ __ lhu(t0, Address(xmethod, Method::intrinsic_id_offset_in_bytes())); ++ __ mv(t1, (int) iid); ++ __ beq(t0, t1, L); ++ if (iid == vmIntrinsics::_linkToVirtual || ++ iid == vmIntrinsics::_linkToSpecial) { ++ // could do this for all kinds, but would explode assembly code size ++ trace_method_handle(_masm, "bad Method*::intrinsic_id"); ++ } ++ __ ebreak(); ++ __ bind(L); ++ BLOCK_COMMENT("} verify_intrinsic_id"); ++ } ++ ++ // First task: Find out how big the argument list is. ++ Address x13_first_arg_addr; ++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); ++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); ++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ __ ld(argp, Address(xmethod, Method::const_offset())); ++ __ load_sized_value(argp, ++ Address(argp, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), /*is_signed*/ false); ++ x13_first_arg_addr = __ argument_address(argp, -1); ++ } else { ++ DEBUG_ONLY(argp = noreg); ++ } ++ ++ if (!is_signature_polymorphic_static(iid)) { ++ __ ld(mh, x13_first_arg_addr); ++ DEBUG_ONLY(argp = noreg); ++ } ++ ++ // x13_first_arg_addr is live! ++ ++ trace_method_handle_interpreter_entry(_masm, iid); ++ if (iid == vmIntrinsics::_invokeBasic) { ++ generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry); ++ ++ } else { ++ // Adjust argument list by popping the trailing MemberName argument. ++ Register recv = noreg; ++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. ++ __ ld(recv = x12, x13_first_arg_addr); ++ } ++ DEBUG_ONLY(argp = noreg); ++ Register xmember = xmethod; // MemberName ptr; incoming method ptr is dead now ++ __ pop_reg(xmember); // extract last argument ++ generate_method_handle_dispatch(_masm, iid, recv, xmember, not_for_compiler_entry); ++ } ++ ++ return entry_point; ++} ++ ++ ++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ++ vmIntrinsics::ID iid, ++ Register receiver_reg, ++ Register member_reg, ++ bool for_compiler_entry) { ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ // temps used in this code are not used in *either* compiled or interpreted calling sequences ++ Register temp1 = x7; ++ Register temp2 = x28; ++ Register temp3 = x29; // x30 is live by this point: it contains the sender SP ++ if (for_compiler_entry) { ++ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); ++ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); ++ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); ++ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7); ++ } ++ ++ assert_different_registers(temp1, temp2, temp3, receiver_reg); ++ assert_different_registers(temp1, temp2, temp3, member_reg); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ // indirect through MH.form.vmentry.vmtarget ++ jump_to_lambda_form(_masm, receiver_reg, xmethod, temp1, for_compiler_entry); ++ } else { ++ // The method is a member invoker used by direct method handles. ++ if (VerifyMethodHandles) { ++ // make sure the trailing argument really is a MemberName (caller responsibility) ++ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), ++ "MemberName required for invokeVirtual etc."); ++ } ++ ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); ++ Address vmtarget_method( xmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); ++ ++ Register temp1_recv_klass = temp1; ++ if (iid != vmIntrinsics::_linkToStatic) { ++ __ verify_oop(receiver_reg); ++ if (iid == vmIntrinsics::_linkToSpecial) { ++ // Don't actually load the klass; just null-check the receiver. ++ __ null_check(receiver_reg); ++ } else { ++ // load receiver klass itself ++ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ BLOCK_COMMENT("check_receiver {"); ++ // The receiver for the MemberName must be in receiver_reg. ++ // Check the receiver against the MemberName.clazz ++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { ++ // Did not load it above... ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { ++ Label L_ok; ++ Register temp2_defc = temp2; ++ __ load_heap_oop(temp2_defc, member_clazz, temp3); ++ load_klass_from_Class(_masm, temp2_defc); ++ __ verify_klass_ptr(temp2_defc); ++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); ++ // If we get here, the type check failed! ++ __ ebreak(); ++ __ bind(L_ok); ++ } ++ BLOCK_COMMENT("} check_receiver"); ++ } ++ if (iid == vmIntrinsics::_linkToSpecial || ++ iid == vmIntrinsics::_linkToStatic) { ++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass ++ } ++ ++ // Live registers at this point: ++ // member_reg - MemberName that was the trailing argument ++ // temp1_recv_klass - klass of stacked receiver, if needed ++ // x30 - interpreter linkage (if interpreted) ++ // x11 ... x10 - compiler arguments (if compiled) ++ ++ Label L_incompatible_class_change_error; ++ switch (iid) { ++ case vmIntrinsics::_linkToSpecial: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); ++ } ++ __ load_heap_oop(xmethod, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToStatic: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); ++ } ++ __ load_heap_oop(xmethod, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, xmethod, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToVirtual: { ++ // same as TemplateTable::invokevirtual, ++ // minus the CP setup and profiling: ++ ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); ++ } ++ ++ // pick out the vtable index from the MemberName, and then we can discard it: ++ Register temp2_index = temp2; ++ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); ++ ++ if (VerifyMethodHandles) { ++ Label L_index_ok; ++ __ bgez(temp2_index, L_index_ok); ++ __ ebreak(); ++ __ BIND(L_index_ok); ++ } ++ ++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget ++ // at this point. And VerifyMethodHandles has already checked clazz, if needed. ++ ++ // get target Method* & entry point ++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, xmethod); ++ break; ++ } ++ ++ case vmIntrinsics::_linkToInterface: { ++ // same as TemplateTable::invokeinterface ++ // (minus the CP setup and profiling, with different argument motion) ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); ++ } ++ ++ Register temp3_intf = temp3; ++ __ load_heap_oop(temp3_intf, member_clazz); ++ load_klass_from_Class(_masm, temp3_intf); ++ __ verify_klass_ptr(temp3_intf); ++ ++ Register rindex = xmethod; ++ __ access_load_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L; ++ __ bgez(rindex, L); ++ __ ebreak(); ++ __ bind(L); ++ } ++ ++ // given intf, index, and recv klass, dispatch to the implementation method ++ __ lookup_interface_method(temp1_recv_klass, temp3_intf, ++ // note: next two args must be the same: ++ rindex, xmethod, ++ temp2, ++ L_incompatible_class_change_error); ++ break; ++ } ++ ++ default: ++ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); ++ break; ++ } ++ ++ // live at this point: xmethod, x30 (if interpreted) ++ ++ // After figuring out which concrete method to call, jump into it. ++ // Note that this works in the interpreter with no data motion. ++ // But the compiled version will require that r2_recv be shifted out. ++ __ verify_method_ptr(xmethod); ++ jump_from_method_handle(_masm, xmethod, temp1, for_compiler_entry); ++ if (iid == vmIntrinsics::_linkToInterface) { ++ __ bind(L_incompatible_class_change_error); ++ __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); ++ } ++ } ++ ++} ++ ++#ifndef PRODUCT ++void trace_method_handle_stub(const char* adaptername, ++ oop mh, ++ intptr_t* saved_regs, ++ intptr_t* entry_sp) { } ++ ++// The stub wraps the arguments in a struct on the stack to avoid ++// dealing with the different calling conventions for passing 6 ++// arguments. ++struct MethodHandleStubArguments { ++ const char* adaptername; ++ oopDesc* mh; ++ intptr_t* saved_regs; ++ intptr_t* entry_sp; ++}; ++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { } ++ ++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { } ++#endif //PRODUCT +diff --git a/src/hotspot/cpu/riscv/methodHandles_riscv.hpp b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp +new file mode 100644 +index 000000000..8ed69efe8 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/methodHandles_riscv.hpp +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// Platform-specific definitions for method handles. ++// These definitions are inlined into class MethodHandles. ++ ++// Adapters ++enum /* platform_dependent_constants */ { ++ adapter_code_size = 32000 DEBUG_ONLY(+ 120000) ++}; ++ ++public: ++ ++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); ++ ++ static void verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; ++ ++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { ++ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), ++ "reference is a MH"); ++ } ++ ++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; ++ ++ // Similar to InterpreterMacroAssembler::jump_from_interpreted. ++ // Takes care of special dispatch from single stepping too. ++ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry); ++ ++ static void jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry); +diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +new file mode 100644 +index 000000000..4b1573130 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp +@@ -0,0 +1,404 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/ostream.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_Runtime1.hpp" ++#endif ++ ++Register NativeInstruction::extract_rs1(address instr) { ++ assert_cond(instr != NULL); ++ return as_Register(Assembler::extract(((unsigned*)instr)[0], 19, 15)); ++} ++ ++Register NativeInstruction::extract_rs2(address instr) { ++ assert_cond(instr != NULL); ++ return as_Register(Assembler::extract(((unsigned*)instr)[0], 24, 20)); ++} ++ ++Register NativeInstruction::extract_rd(address instr) { ++ assert_cond(instr != NULL); ++ return as_Register(Assembler::extract(((unsigned*)instr)[0], 11, 7)); ++} ++ ++uint32_t NativeInstruction::extract_opcode(address instr) { ++ assert_cond(instr != NULL); ++ return Assembler::extract(((unsigned*)instr)[0], 6, 0); ++} ++ ++uint32_t NativeInstruction::extract_funct3(address instr) { ++ assert_cond(instr != NULL); ++ return Assembler::extract(((unsigned*)instr)[0], 14, 12); ++} ++ ++bool NativeInstruction::is_pc_relative_at(address instr) { ++ // auipc + jalr ++ // auipc + addi ++ // auipc + load ++ // auipc + fload_load ++ return (is_auipc_at(instr)) && ++ (is_addi_at(instr + instruction_size) || ++ is_jalr_at(instr + instruction_size) || ++ is_load_at(instr + instruction_size) || ++ is_float_load_at(instr + instruction_size)) && ++ check_pc_relative_data_dependency(instr); ++} ++ ++// ie:ld(Rd, Label) ++bool NativeInstruction::is_load_pc_relative_at(address instr) { ++ return is_auipc_at(instr) && // auipc ++ is_ld_at(instr + instruction_size) && // ld ++ check_load_pc_relative_data_dependency(instr); ++} ++ ++bool NativeInstruction::is_movptr_at(address instr) { ++ return is_lui_at(instr) && // Lui ++ is_addi_at(instr + instruction_size) && // Addi ++ is_slli_shift_at(instr + instruction_size * 2, 11) && // Slli Rd, Rs, 11 ++ is_addi_at(instr + instruction_size * 3) && // Addi ++ is_slli_shift_at(instr + instruction_size * 4, 6) && // Slli Rd, Rs, 6 ++ (is_addi_at(instr + instruction_size * 5) || ++ is_jalr_at(instr + instruction_size * 5) || ++ is_load_at(instr + instruction_size * 5)) && // Addi/Jalr/Load ++ check_movptr_data_dependency(instr); ++} ++ ++bool NativeInstruction::is_li32_at(address instr) { ++ return is_lui_at(instr) && // lui ++ is_addiw_at(instr + instruction_size) && // addiw ++ check_li32_data_dependency(instr); ++} ++ ++bool NativeInstruction::is_li64_at(address instr) { ++ return is_lui_at(instr) && // lui ++ is_addi_at(instr + instruction_size) && // addi ++ is_slli_shift_at(instr + instruction_size * 2, 12) && // Slli Rd, Rs, 12 ++ is_addi_at(instr + instruction_size * 3) && // addi ++ is_slli_shift_at(instr + instruction_size * 4, 12) && // Slli Rd, Rs, 12 ++ is_addi_at(instr + instruction_size * 5) && // addi ++ is_slli_shift_at(instr + instruction_size * 6, 8) && // Slli Rd, Rs, 8 ++ is_addi_at(instr + instruction_size * 7) && // addi ++ check_li64_data_dependency(instr); ++} ++ ++void NativeCall::verify() { ++ assert(NativeCall::is_call_at((address)this), "unexpected code at call site"); ++} ++ ++address NativeCall::destination() const { ++ address addr = (address)this; ++ assert(NativeInstruction::is_jal_at(instruction_address()), "inst must be jal."); ++ address destination = MacroAssembler::target_addr_for_insn(instruction_address()); ++ ++ // Do we use a trampoline stub for this call? ++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. ++ assert(cb && cb->is_nmethod(), "sanity"); ++ nmethod *nm = (nmethod *)cb; ++ if (nm != NULL && nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) { ++ // Yes we do, so get the destination from the trampoline stub. ++ const address trampoline_stub_addr = destination; ++ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); ++ } ++ ++ return destination; ++} ++ ++// Similar to replace_mt_safe, but just changes the destination. The ++// important thing is that free-running threads are able to execute this ++// call instruction at all times. ++// ++// Used in the runtime linkage of calls; see class CompiledIC. ++// ++// Add parameter assert_lock to switch off assertion ++// during code generation, where no patching lock is needed. ++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { ++ assert(!assert_lock || ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), ++ "concurrent code patching"); ++ ++ ResourceMark rm; ++ address addr_call = addr_at(0); ++ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); ++ ++ // Patch the constant in the call's trampoline stub. ++ address trampoline_stub_addr = get_trampoline(); ++ if (trampoline_stub_addr != NULL) { ++ assert (!is_NativeCallTrampolineStub_at(dest), "chained trampolines"); ++ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); ++ } ++ ++ // Patch the call. ++ if (Assembler::reachable_from_branch_at(addr_call, dest)) { ++ set_destination(dest); ++ } else { ++ assert (trampoline_stub_addr != NULL, "we need a trampoline"); ++ set_destination(trampoline_stub_addr); ++ } ++ ++ ICache::invalidate_range(addr_call, instruction_size); ++} ++ ++address NativeCall::get_trampoline() { ++ address call_addr = addr_at(0); ++ ++ CodeBlob *code = CodeCache::find_blob(call_addr); ++ assert(code != NULL, "Could not find the containing code blob"); ++ ++ address jal_destination = MacroAssembler::pd_call_destination(call_addr); ++ if (code != NULL && code->contains(jal_destination) && is_NativeCallTrampolineStub_at(jal_destination)) { ++ return jal_destination; ++ } ++ ++ if (code != NULL && code->is_nmethod()) { ++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++ } ++ ++ return NULL; ++} ++ ++// Inserts a native call instruction at a given pc ++void NativeCall::insert(address code_pos, address entry) { Unimplemented(); } ++ ++//------------------------------------------------------------------- ++ ++void NativeMovConstReg::verify() { ++ if (!(nativeInstruction_at(instruction_address())->is_movptr() || ++ is_auipc_at(instruction_address()))) { ++ fatal("should be MOVPTR or AUIPC"); ++ } ++} ++ ++intptr_t NativeMovConstReg::data() const { ++ address addr = MacroAssembler::target_addr_for_insn(instruction_address()); ++ if (maybe_cpool_ref(instruction_address())) { ++ return *(intptr_t*)addr; ++ } else { ++ return (intptr_t)addr; ++ } ++} ++ ++void NativeMovConstReg::set_data(intptr_t x) { ++ if (maybe_cpool_ref(instruction_address())) { ++ address addr = MacroAssembler::target_addr_for_insn(instruction_address()); ++ *(intptr_t*)addr = x; ++ } else { ++ // Store x into the instruction stream. ++ MacroAssembler::pd_patch_instruction_size(instruction_address(), (address)x); ++ ICache::invalidate_range(instruction_address(), movptr_instruction_size); ++ } ++ ++ // Find and replace the oop/metadata corresponding to this ++ // instruction in oops section. ++ CodeBlob* cb = CodeCache::find_blob(instruction_address()); ++ if(cb != NULL) { ++ nmethod* nm = cb->as_nmethod_or_null(); ++ if (nm != NULL) { ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(x); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)x; ++ break; ++ } ++ } ++ } ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeMovConstReg::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, ++ p2i(instruction_address()), data()); ++} ++ ++//------------------------------------------------------------------- ++ ++int NativeMovRegMem::offset() const { ++ Unimplemented(); ++ return 0; ++} ++ ++void NativeMovRegMem::set_offset(int x) { Unimplemented(); } ++ ++void NativeMovRegMem::verify() { ++ Unimplemented(); ++} ++ ++//-------------------------------------------------------------------------------- ++ ++void NativeJump::verify() { } ++ ++ ++void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { ++} ++ ++ ++address NativeJump::jump_destination() const { ++ address dest = MacroAssembler::target_addr_for_insn(instruction_address()); ++ ++ // We use jump to self as the unresolved address which the inline ++ // cache code (and relocs) know about ++ ++ // return -1 if jump to self ++ dest = (dest == (address) this) ? (address) -1 : dest; ++ return dest; ++}; ++ ++//------------------------------------------------------------------- ++ ++address NativeGeneralJump::jump_destination() const { ++ NativeMovConstReg* move = nativeMovConstReg_at(instruction_address()); ++ address dest = (address) move->data(); ++ ++ // We use jump to self as the unresolved address which the inline ++ // cache code (and relocs) know about ++ ++ // return -1 if jump to self ++ dest = (dest == (address) this) ? (address) -1 : dest; ++ return dest; ++} ++ ++//------------------------------------------------------------------- ++ ++bool NativeInstruction::is_safepoint_poll() { ++ return is_lwu_to_zr(address(this)); ++} ++ ++bool NativeInstruction::is_lwu_to_zr(address instr) { ++ return (extract_opcode(instr) == 0b0000011 && ++ extract_funct3(instr) == 0b110 && ++ extract_rd(instr) == zr); // zr ++} ++ ++// A 16-bit instruction with all bits ones is permanently reserved as an illegal instruction. ++bool NativeInstruction::is_sigill_zombie_not_entrant() { ++ // jvmci ++ return uint_at(0) == 0xffffffff; ++} ++ ++void NativeIllegalInstruction::insert(address code_pos) { ++ assert_cond(code_pos != NULL); ++ *(juint*)code_pos = 0xffffffff; // all bits ones is permanently reserved as an illegal instruction ++} ++ ++//------------------------------------------------------------------- ++ ++// MT-safe inserting of a jump over a jump or a nop (used by ++// nmethod::make_not_entrant_or_zombie) ++ ++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ++ ++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); ++ ++ assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() || ++ nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(), ++ "riscv cannot replace non-jump with jump"); ++ ++ // Patch this nmethod atomically. ++ if (Assembler::reachable_from_branch_at(verified_entry, dest)) { ++ ptrdiff_t offset = dest - verified_entry; ++ guarantee(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction."); // 1M ++ ++ uint32_t insn = 0; ++ address pInsn = (address)&insn; ++ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); ++ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); ++ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); ++ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); ++ Assembler::patch(pInsn, 11, 7, 0); // zero, no link jump ++ Assembler::patch(pInsn, 6, 0, 0b1101111); // j, (jal x0 offset) ++ *(unsigned int*)verified_entry = insn; ++ } else { ++ // We use an illegal instruction for marking a method as ++ // not_entrant or zombie. ++ NativeIllegalInstruction::insert(verified_entry); ++ } ++ ++ ICache::invalidate_range(verified_entry, instruction_size); ++} ++ ++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ++ CodeBuffer cb(code_pos, instruction_size); ++ MacroAssembler a(&cb); ++ ++ int32_t offset = 0; ++ a.movptr_with_offset(t0, entry, offset); // lui, addi, slli, addi, slli ++ a.jalr(x0, t0, offset); // jalr ++ ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++// MT-safe patching of a long jump instruction. ++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { ++ ShouldNotCallThis(); ++} ++ ++ ++address NativeCallTrampolineStub::destination(nmethod *nm) const { ++ return ptr_at(data_offset); ++} ++ ++void NativeCallTrampolineStub::set_destination(address new_destination) { ++ set_ptr_at(data_offset, new_destination); ++ OrderAccess::release(); ++} ++ ++uint32_t NativeMembar::get_kind() { ++ uint32_t insn = uint_at(0); ++ ++ uint32_t predecessor = Assembler::extract(insn, 27, 24); ++ uint32_t successor = Assembler::extract(insn, 23, 20); ++ ++ return MacroAssembler::pred_succ_to_membar_mask(predecessor, successor); ++} ++ ++void NativeMembar::set_kind(uint32_t order_kind) { ++ uint32_t predecessor = 0; ++ uint32_t successor = 0; ++ ++ MacroAssembler::membar_mask_to_pred_succ(order_kind, predecessor, successor); ++ ++ uint32_t insn = uint_at(0); ++ address pInsn = (address) &insn; ++ Assembler::patch(pInsn, 27, 24, predecessor); ++ Assembler::patch(pInsn, 23, 20, successor); ++ ++ address membar = addr_at(0); ++ *(unsigned int*) membar = insn; ++} +diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp +new file mode 100644 +index 000000000..e8a4e0a46 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp +@@ -0,0 +1,561 @@ ++/* ++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_NATIVEINST_RISCV_HPP ++#define CPU_RISCV_NATIVEINST_RISCV_HPP ++ ++#include "asm/assembler.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/os.hpp" ++ ++// We have interfaces for the following instructions: ++// - NativeInstruction ++// - - NativeCall ++// - - NativeMovConstReg ++// - - NativeMovRegMem ++// - - NativeJump ++// - - NativeGeneralJump ++// - - NativeIllegalInstruction ++// - - NativeCallTrampolineStub ++// - - NativeMembar ++ ++// The base class for different kinds of native instruction abstractions. ++// Provides the primitive operations to manipulate code relative to this. ++ ++class NativeInstruction { ++ friend class Relocation; ++ friend bool is_NativeCallTrampolineStub_at(address); ++ public: ++ enum { ++ instruction_size = 4 ++ }; ++ ++ juint encoding() const { ++ return uint_at(0); ++ } ++ ++ bool is_jal() const { return is_jal_at(addr_at(0)); } ++ bool is_movptr() const { return is_movptr_at(addr_at(0)); } ++ bool is_call() const { return is_call_at(addr_at(0)); } ++ bool is_jump() const { return is_jump_at(addr_at(0)); } ++ ++ static bool is_jal_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1101111; } ++ static bool is_jalr_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; } ++ static bool is_branch_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b1100011; } ++ static bool is_ld_at(address instr) { assert_cond(instr != NULL); return is_load_at(instr) && extract_funct3(instr) == 0b011; } ++ static bool is_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000011; } ++ static bool is_float_load_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0000111; } ++ static bool is_auipc_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010111; } ++ static bool is_jump_at(address instr) { assert_cond(instr != NULL); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); } ++ static bool is_addi_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; } ++ static bool is_addiw_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; } ++ static bool is_lui_at(address instr) { assert_cond(instr != NULL); return extract_opcode(instr) == 0b0110111; } ++ static bool is_slli_shift_at(address instr, uint32_t shift) { ++ assert_cond(instr != NULL); ++ return (extract_opcode(instr) == 0b0010011 && // opcode field ++ extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation ++ Assembler::extract(((unsigned*)instr)[0], 25, 20) == shift); // shamt field ++ } ++ ++ static Register extract_rs1(address instr); ++ static Register extract_rs2(address instr); ++ static Register extract_rd(address instr); ++ static uint32_t extract_opcode(address instr); ++ static uint32_t extract_funct3(address instr); ++ ++ // the instruction sequence of movptr is as below: ++ // lui ++ // addi ++ // slli ++ // addi ++ // slli ++ // addi/jalr/load ++ static bool check_movptr_data_dependency(address instr) { ++ address lui = instr; ++ address addi1 = lui + instruction_size; ++ address slli1 = addi1 + instruction_size; ++ address addi2 = slli1 + instruction_size; ++ address slli2 = addi2 + instruction_size; ++ address last_instr = slli2 + instruction_size; ++ return extract_rs1(addi1) == extract_rd(lui) && ++ extract_rs1(addi1) == extract_rd(addi1) && ++ extract_rs1(slli1) == extract_rd(addi1) && ++ extract_rs1(slli1) == extract_rd(slli1) && ++ extract_rs1(addi2) == extract_rd(slli1) && ++ extract_rs1(addi2) == extract_rd(addi2) && ++ extract_rs1(slli2) == extract_rd(addi2) && ++ extract_rs1(slli2) == extract_rd(slli2) && ++ extract_rs1(last_instr) == extract_rd(slli2); ++ } ++ ++ // the instruction sequence of li64 is as below: ++ // lui ++ // addi ++ // slli ++ // addi ++ // slli ++ // addi ++ // slli ++ // addi ++ static bool check_li64_data_dependency(address instr) { ++ address lui = instr; ++ address addi1 = lui + instruction_size; ++ address slli1 = addi1 + instruction_size; ++ address addi2 = slli1 + instruction_size; ++ address slli2 = addi2 + instruction_size; ++ address addi3 = slli2 + instruction_size; ++ address slli3 = addi3 + instruction_size; ++ address addi4 = slli3 + instruction_size; ++ return extract_rs1(addi1) == extract_rd(lui) && ++ extract_rs1(addi1) == extract_rd(addi1) && ++ extract_rs1(slli1) == extract_rd(addi1) && ++ extract_rs1(slli1) == extract_rd(slli1) && ++ extract_rs1(addi2) == extract_rd(slli1) && ++ extract_rs1(addi2) == extract_rd(addi2) && ++ extract_rs1(slli2) == extract_rd(addi2) && ++ extract_rs1(slli2) == extract_rd(slli2) && ++ extract_rs1(addi3) == extract_rd(slli2) && ++ extract_rs1(addi3) == extract_rd(addi3) && ++ extract_rs1(slli3) == extract_rd(addi3) && ++ extract_rs1(slli3) == extract_rd(slli3) && ++ extract_rs1(addi4) == extract_rd(slli3) && ++ extract_rs1(addi4) == extract_rd(addi4); ++ } ++ ++ // the instruction sequence of li32 is as below: ++ // lui ++ // addiw ++ static bool check_li32_data_dependency(address instr) { ++ address lui = instr; ++ address addiw = lui + instruction_size; ++ ++ return extract_rs1(addiw) == extract_rd(lui) && ++ extract_rs1(addiw) == extract_rd(addiw); ++ } ++ ++ // the instruction sequence of pc-relative is as below: ++ // auipc ++ // jalr/addi/load/float_load ++ static bool check_pc_relative_data_dependency(address instr) { ++ address auipc = instr; ++ address last_instr = auipc + instruction_size; ++ ++ return extract_rs1(last_instr) == extract_rd(auipc); ++ } ++ ++ // the instruction sequence of load_label is as below: ++ // auipc ++ // load ++ static bool check_load_pc_relative_data_dependency(address instr) { ++ address auipc = instr; ++ address last_instr = auipc + instruction_size; ++ ++ return extract_rs1(last_instr) == extract_rd(auipc); ++ } ++ ++ static bool is_movptr_at(address instr); ++ static bool is_li32_at(address instr); ++ static bool is_li64_at(address instr); ++ static bool is_pc_relative_at(address branch); ++ static bool is_load_pc_relative_at(address branch); ++ ++ static bool is_call_at(address instr) { ++ if (is_jal_at(instr) || is_jalr_at(instr)) { ++ return true; ++ } ++ return false; ++ } ++ static bool is_lwu_to_zr(address instr); ++ ++ inline bool is_nop(); ++ inline bool is_jump_or_nop(); ++ bool is_safepoint_poll(); ++ bool is_sigill_zombie_not_entrant(); ++ ++ protected: ++ address addr_at(int offset) const { return address(this) + offset; } ++ ++ jint int_at(int offset) const { return *(jint*) addr_at(offset); } ++ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } ++ ++ address ptr_at(int offset) const { return *(address*) addr_at(offset); } ++ ++ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } ++ ++ ++ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; } ++ void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; } ++ void set_ptr_at (int offset, address ptr) { *(address*) addr_at(offset) = ptr; } ++ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; } ++ ++ public: ++ ++ inline friend NativeInstruction* nativeInstruction_at(address addr); ++ ++ static bool maybe_cpool_ref(address instr) { ++ return is_auipc_at(instr); ++ } ++ ++ bool is_membar() { ++ return (uint_at(0) & 0x7f) == 0b1111 && extract_funct3(addr_at(0)) == 0; ++ } ++}; ++ ++inline NativeInstruction* nativeInstruction_at(address addr) { ++ return (NativeInstruction*)addr; ++} ++ ++// The natural type of an RISCV instruction is uint32_t ++inline NativeInstruction* nativeInstruction_at(uint32_t *addr) { ++ return (NativeInstruction*)addr; ++} ++ ++inline NativeCall* nativeCall_at(address addr); ++// The NativeCall is an abstraction for accessing/manipulating native ++// call instructions (used to manipulate inline caches, primitive & ++// DSO calls, etc.). ++ ++class NativeCall: public NativeInstruction { ++ public: ++ enum RISCV_specific_constants { ++ instruction_size = 4, ++ instruction_offset = 0, ++ displacement_offset = 0, ++ return_address_offset = 4 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { return addr_at(return_address_offset); } ++ address return_address() const { return addr_at(return_address_offset); } ++ address destination() const; ++ ++ void set_destination(address dest) { ++ if (is_jal()) { ++ intptr_t offset = (intptr_t)(dest - instruction_address()); ++ assert((offset & 0x1) == 0, "should be aligned"); ++ assert(is_imm_in_range(offset, 20, 1), "set_destination, offset is too large to be patched in one jal insrusction\n"); ++ unsigned int insn = 0b1101111; // jal ++ address pInsn = (address)(&insn); ++ Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1); ++ Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff); ++ Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1); ++ Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff); ++ Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra ++ set_int_at(displacement_offset, insn); ++ return; ++ } ++ ShouldNotReachHere(); ++ } ++ ++ void verify_alignment() { ; } ++ void verify(); ++ void print(); ++ ++ // Creation ++ inline friend NativeCall* nativeCall_at(address addr); ++ inline friend NativeCall* nativeCall_before(address return_address); ++ ++ static bool is_call_before(address return_address) { ++ return is_call_at(return_address - NativeCall::return_address_offset); ++ } ++ ++ // MT-safe patching of a call instruction. ++ static void insert(address code_pos, address entry); ++ ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++ ++ // Similar to replace_mt_safe, but just changes the destination. The ++ // important thing is that free-running threads are able to execute ++ // this call instruction at all times. If the call is an immediate BL ++ // instruction we can simply rely on atomicity of 32-bit writes to ++ // make sure other threads will see no intermediate states. ++ ++ // We cannot rely on locks here, since the free-running threads must run at ++ // full speed. ++ // ++ // Used in the runtime linkage of calls; see class CompiledIC. ++ // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) ++ ++ // The parameter assert_lock disables the assertion during code generation. ++ void set_destination_mt_safe(address dest, bool assert_lock = true); ++ ++ address get_trampoline(); ++}; ++ ++inline NativeCall* nativeCall_at(address addr) { ++ assert_cond(addr != NULL); ++ NativeCall* call = (NativeCall*)(addr - NativeCall::instruction_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++inline NativeCall* nativeCall_before(address return_address) { ++ assert_cond(return_address != NULL); ++ NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++// An interface for accessing/manipulating native mov reg, imm instructions. ++// (used to manipulate inlined 64-bit data calls, etc.) ++class NativeMovConstReg: public NativeInstruction { ++ public: ++ enum RISCV_specific_constants { ++ movptr_instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr(). ++ movptr_with_offset_instruction_size = 5 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli. See movptr_with_offset(). ++ load_pc_relative_instruction_size = 2 * NativeInstruction::instruction_size, // auipc, ld ++ instruction_offset = 0, ++ displacement_offset = 0 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { ++ // if the instruction at 5 * instruction_size is addi, ++ // it means a lui + addi + slli + addi + slli + addi instruction sequence, ++ // and the next instruction address should be addr_at(6 * instruction_size). ++ // However, when the instruction at 5 * instruction_size isn't addi, ++ // the next instruction address should be addr_at(5 * instruction_size) ++ if (nativeInstruction_at(instruction_address())->is_movptr()) { ++ if (is_addi_at(addr_at(movptr_with_offset_instruction_size))) { ++ // Assume: lui, addi, slli, addi, slli, addi ++ return addr_at(movptr_instruction_size); ++ } else { ++ // Assume: lui, addi, slli, addi, slli ++ return addr_at(movptr_with_offset_instruction_size); ++ } ++ } else if (is_load_pc_relative_at(instruction_address())) { ++ // Assume: auipc, ld ++ return addr_at(load_pc_relative_instruction_size); ++ } ++ guarantee(false, "Unknown instruction in NativeMovConstReg"); ++ return NULL; ++ } ++ ++ intptr_t data() const; ++ void set_data(intptr_t x); ++ ++ void flush() { ++ if (!maybe_cpool_ref(instruction_address())) { ++ ICache::invalidate_range(instruction_address(), movptr_instruction_size); ++ } ++ } ++ ++ void verify(); ++ void print(); ++ ++ // Creation ++ inline friend NativeMovConstReg* nativeMovConstReg_at(address addr); ++ inline friend NativeMovConstReg* nativeMovConstReg_before(address addr); ++}; ++ ++inline NativeMovConstReg* nativeMovConstReg_at(address addr) { ++ assert_cond(addr != NULL); ++ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++inline NativeMovConstReg* nativeMovConstReg_before(address addr) { ++ assert_cond(addr != NULL); ++ NativeMovConstReg* test = (NativeMovConstReg*)(addr - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++// RISCV should not use C1 runtime patching, so just leave NativeMovRegMem Unimplemented. ++class NativeMovRegMem: public NativeInstruction { ++ public: ++ int instruction_start() const { ++ Unimplemented(); ++ return 0; ++ } ++ ++ address instruction_address() const { ++ Unimplemented(); ++ return NULL; ++ } ++ ++ int num_bytes_to_end_of_patch() const { ++ Unimplemented(); ++ return 0; ++ } ++ ++ int offset() const; ++ ++ void set_offset(int x); ++ ++ void add_offset_in_bytes(int add_offset) { Unimplemented(); } ++ ++ void verify(); ++ void print(); ++ ++ private: ++ inline friend NativeMovRegMem* nativeMovRegMem_at (address addr); ++}; ++ ++inline NativeMovRegMem* nativeMovRegMem_at (address addr) { ++ Unimplemented(); ++ return NULL; ++} ++ ++class NativeJump: public NativeInstruction { ++ public: ++ enum RISCV_specific_constants { ++ instruction_size = NativeInstruction::instruction_size, ++ instruction_offset = 0, ++ data_offset = 0, ++ next_instruction_offset = NativeInstruction::instruction_size ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { return addr_at(instruction_size); } ++ address jump_destination() const; ++ ++ // Creation ++ inline friend NativeJump* nativeJump_at(address address); ++ ++ void verify(); ++ ++ // Unit testing stuff ++ static void test() {} ++ ++ // Insertion of native jump instruction ++ static void insert(address code_pos, address entry); ++ // MT-safe insertion of native jump at verified method entry ++ static void check_verified_entry_alignment(address entry, address verified_entry); ++ static void patch_verified_entry(address entry, address verified_entry, address dest); ++}; ++ ++inline NativeJump* nativeJump_at(address addr) { ++ NativeJump* jump = (NativeJump*)(addr - NativeJump::instruction_offset); ++#ifdef ASSERT ++ jump->verify(); ++#endif ++ return jump; ++} ++ ++class NativeGeneralJump: public NativeJump { ++public: ++ enum RISCV_specific_constants { ++ instruction_size = 6 * NativeInstruction::instruction_size, // lui, addi, slli, addi, slli, jalr ++ instruction_offset = 0, ++ data_offset = 0, ++ next_instruction_offset = 6 * NativeInstruction::instruction_size // lui, addi, slli, addi, slli, jalr ++ }; ++ ++ address jump_destination() const; ++ ++ static void insert_unconditional(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++}; ++ ++inline NativeGeneralJump* nativeGeneralJump_at(address addr) { ++ assert_cond(addr != NULL); ++ NativeGeneralJump* jump = (NativeGeneralJump*)(addr); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeIllegalInstruction: public NativeInstruction { ++ public: ++ // Insert illegal opcode as specific address ++ static void insert(address code_pos); ++}; ++ ++inline bool NativeInstruction::is_nop() { ++ uint32_t insn = *(uint32_t*)addr_at(0); ++ return insn == 0x13; ++} ++ ++inline bool NativeInstruction::is_jump_or_nop() { ++ return is_nop() || is_jump(); ++} ++ ++// Call trampoline stubs. ++class NativeCallTrampolineStub : public NativeInstruction { ++ public: ++ ++ enum RISCV_specific_constants { ++ // Refer to function emit_trampoline_stub. ++ instruction_size = 3 * NativeInstruction::instruction_size + wordSize, // auipc + ld + jr + target address ++ data_offset = 3 * NativeInstruction::instruction_size, // auipc + ld + jr ++ }; ++ ++ address destination(nmethod *nm = NULL) const; ++ void set_destination(address new_destination); ++ ptrdiff_t destination_offset() const; ++}; ++ ++inline bool is_NativeCallTrampolineStub_at(address addr) { ++ // Ensure that the stub is exactly ++ // ld t0, L--->auipc + ld ++ // jr t0 ++ // L: ++ ++ // judge inst + register + imm ++ // 1). check the instructions: auipc + ld + jalr ++ // 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0 ++ // 3). check if the offset in ld[31:20] equals the data_offset ++ assert_cond(addr != NULL); ++ const int instr_size = NativeInstruction::instruction_size; ++ if (NativeInstruction::is_auipc_at(addr) && NativeInstruction::is_ld_at(addr + instr_size) && NativeInstruction::is_jalr_at(addr + 2 * instr_size) && ++ (NativeInstruction::extract_rd(addr) == x5) && ++ (NativeInstruction::extract_rd(addr + instr_size) == x5) && ++ (NativeInstruction::extract_rs1(addr + instr_size) == x5) && ++ (NativeInstruction::extract_rs1(addr + 2 * instr_size) == x5) && ++ (Assembler::extract(((unsigned*)addr)[1], 31, 20) == NativeCallTrampolineStub::data_offset)) { ++ return true; ++ } ++ return false; ++} ++ ++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { ++ assert_cond(addr != NULL); ++ assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found"); ++ return (NativeCallTrampolineStub*)addr; ++} ++ ++class NativeMembar : public NativeInstruction { ++public: ++ uint32_t get_kind(); ++ void set_kind(uint32_t order_kind); ++}; ++ ++inline NativeMembar *NativeMembar_at(address addr) { ++ assert_cond(addr != NULL); ++ assert(nativeInstruction_at(addr)->is_membar(), "no membar found"); ++ return (NativeMembar*)addr; ++} ++ ++#endif // CPU_RISCV_NATIVEINST_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/registerMap_riscv.hpp b/src/hotspot/cpu/riscv/registerMap_riscv.hpp +new file mode 100644 +index 000000000..04a36c1c7 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/registerMap_riscv.hpp +@@ -0,0 +1,46 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_REGISTERMAP_RISCV_HPP ++#define CPU_RISCV_REGISTERMAP_RISCV_HPP ++ ++// machine-dependent implemention for register maps ++ friend class frame; ++ ++ private: ++ // This is the hook for finding a register in an "well-known" location, ++ // such as a register block of a predetermined format. ++ // Since there is none, we just return NULL. ++ // See registerMap_riscv.hpp for an example of grabbing registers ++ // from register save areas of a standard layout. ++ address pd_location(VMReg reg) const {return NULL;} ++ ++ // no PD state to clear or copy: ++ void pd_clear() {} ++ void pd_initialize() {} ++ void pd_initialize_from(const RegisterMap* map) {} ++ ++#endif // CPU_RISCV_REGISTERMAP_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/register_definitions_riscv.cpp b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp +new file mode 100644 +index 000000000..b30c1b107 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/register_definitions_riscv.cpp +@@ -0,0 +1,193 @@ ++/* ++ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "asm/register.hpp" ++#include "interp_masm_riscv.hpp" ++#include "register_riscv.hpp" ++ ++REGISTER_DEFINITION(Register, noreg); ++ ++REGISTER_DEFINITION(Register, x0); ++REGISTER_DEFINITION(Register, x1); ++REGISTER_DEFINITION(Register, x2); ++REGISTER_DEFINITION(Register, x3); ++REGISTER_DEFINITION(Register, x4); ++REGISTER_DEFINITION(Register, x5); ++REGISTER_DEFINITION(Register, x6); ++REGISTER_DEFINITION(Register, x7); ++REGISTER_DEFINITION(Register, x8); ++REGISTER_DEFINITION(Register, x9); ++REGISTER_DEFINITION(Register, x10); ++REGISTER_DEFINITION(Register, x11); ++REGISTER_DEFINITION(Register, x12); ++REGISTER_DEFINITION(Register, x13); ++REGISTER_DEFINITION(Register, x14); ++REGISTER_DEFINITION(Register, x15); ++REGISTER_DEFINITION(Register, x16); ++REGISTER_DEFINITION(Register, x17); ++REGISTER_DEFINITION(Register, x18); ++REGISTER_DEFINITION(Register, x19); ++REGISTER_DEFINITION(Register, x20); ++REGISTER_DEFINITION(Register, x21); ++REGISTER_DEFINITION(Register, x22); ++REGISTER_DEFINITION(Register, x23); ++REGISTER_DEFINITION(Register, x24); ++REGISTER_DEFINITION(Register, x25); ++REGISTER_DEFINITION(Register, x26); ++REGISTER_DEFINITION(Register, x27); ++REGISTER_DEFINITION(Register, x28); ++REGISTER_DEFINITION(Register, x29); ++REGISTER_DEFINITION(Register, x30); ++REGISTER_DEFINITION(Register, x31); ++ ++REGISTER_DEFINITION(FloatRegister, fnoreg); ++ ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); ++ ++REGISTER_DEFINITION(VectorRegister, vnoreg); ++ ++REGISTER_DEFINITION(VectorRegister, v0); ++REGISTER_DEFINITION(VectorRegister, v1); ++REGISTER_DEFINITION(VectorRegister, v2); ++REGISTER_DEFINITION(VectorRegister, v3); ++REGISTER_DEFINITION(VectorRegister, v4); ++REGISTER_DEFINITION(VectorRegister, v5); ++REGISTER_DEFINITION(VectorRegister, v6); ++REGISTER_DEFINITION(VectorRegister, v7); ++REGISTER_DEFINITION(VectorRegister, v8); ++REGISTER_DEFINITION(VectorRegister, v9); ++REGISTER_DEFINITION(VectorRegister, v10); ++REGISTER_DEFINITION(VectorRegister, v11); ++REGISTER_DEFINITION(VectorRegister, v12); ++REGISTER_DEFINITION(VectorRegister, v13); ++REGISTER_DEFINITION(VectorRegister, v14); ++REGISTER_DEFINITION(VectorRegister, v15); ++REGISTER_DEFINITION(VectorRegister, v16); ++REGISTER_DEFINITION(VectorRegister, v17); ++REGISTER_DEFINITION(VectorRegister, v18); ++REGISTER_DEFINITION(VectorRegister, v19); ++REGISTER_DEFINITION(VectorRegister, v20); ++REGISTER_DEFINITION(VectorRegister, v21); ++REGISTER_DEFINITION(VectorRegister, v22); ++REGISTER_DEFINITION(VectorRegister, v23); ++REGISTER_DEFINITION(VectorRegister, v24); ++REGISTER_DEFINITION(VectorRegister, v25); ++REGISTER_DEFINITION(VectorRegister, v26); ++REGISTER_DEFINITION(VectorRegister, v27); ++REGISTER_DEFINITION(VectorRegister, v28); ++REGISTER_DEFINITION(VectorRegister, v29); ++REGISTER_DEFINITION(VectorRegister, v30); ++REGISTER_DEFINITION(VectorRegister, v31); ++ ++REGISTER_DEFINITION(Register, c_rarg0); ++REGISTER_DEFINITION(Register, c_rarg1); ++REGISTER_DEFINITION(Register, c_rarg2); ++REGISTER_DEFINITION(Register, c_rarg3); ++REGISTER_DEFINITION(Register, c_rarg4); ++REGISTER_DEFINITION(Register, c_rarg5); ++REGISTER_DEFINITION(Register, c_rarg6); ++REGISTER_DEFINITION(Register, c_rarg7); ++ ++REGISTER_DEFINITION(FloatRegister, c_farg0); ++REGISTER_DEFINITION(FloatRegister, c_farg1); ++REGISTER_DEFINITION(FloatRegister, c_farg2); ++REGISTER_DEFINITION(FloatRegister, c_farg3); ++REGISTER_DEFINITION(FloatRegister, c_farg4); ++REGISTER_DEFINITION(FloatRegister, c_farg5); ++REGISTER_DEFINITION(FloatRegister, c_farg6); ++REGISTER_DEFINITION(FloatRegister, c_farg7); ++ ++REGISTER_DEFINITION(Register, j_rarg0); ++REGISTER_DEFINITION(Register, j_rarg1); ++REGISTER_DEFINITION(Register, j_rarg2); ++REGISTER_DEFINITION(Register, j_rarg3); ++REGISTER_DEFINITION(Register, j_rarg4); ++REGISTER_DEFINITION(Register, j_rarg5); ++REGISTER_DEFINITION(Register, j_rarg6); ++REGISTER_DEFINITION(Register, j_rarg7); ++ ++REGISTER_DEFINITION(FloatRegister, j_farg0); ++REGISTER_DEFINITION(FloatRegister, j_farg1); ++REGISTER_DEFINITION(FloatRegister, j_farg2); ++REGISTER_DEFINITION(FloatRegister, j_farg3); ++REGISTER_DEFINITION(FloatRegister, j_farg4); ++REGISTER_DEFINITION(FloatRegister, j_farg5); ++REGISTER_DEFINITION(FloatRegister, j_farg6); ++REGISTER_DEFINITION(FloatRegister, j_farg7); ++ ++REGISTER_DEFINITION(Register, zr); ++REGISTER_DEFINITION(Register, gp); ++REGISTER_DEFINITION(Register, tp); ++REGISTER_DEFINITION(Register, xmethod); ++REGISTER_DEFINITION(Register, ra); ++REGISTER_DEFINITION(Register, sp); ++REGISTER_DEFINITION(Register, fp); ++REGISTER_DEFINITION(Register, xheapbase); ++REGISTER_DEFINITION(Register, xcpool); ++REGISTER_DEFINITION(Register, xmonitors); ++REGISTER_DEFINITION(Register, xlocals); ++REGISTER_DEFINITION(Register, xthread); ++REGISTER_DEFINITION(Register, xbcp); ++REGISTER_DEFINITION(Register, xdispatch); ++REGISTER_DEFINITION(Register, esp); ++ ++REGISTER_DEFINITION(Register, t0); ++REGISTER_DEFINITION(Register, t1); ++REGISTER_DEFINITION(Register, t2); +diff --git a/src/hotspot/cpu/riscv/register_riscv.cpp b/src/hotspot/cpu/riscv/register_riscv.cpp +new file mode 100644 +index 000000000..76215ef2a +--- /dev/null ++++ b/src/hotspot/cpu/riscv/register_riscv.cpp +@@ -0,0 +1,69 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "register_riscv.hpp" ++ ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * ++ RegisterImpl::max_slots_per_register; ++const int ConcreteRegisterImpl::max_fpr = ++ ConcreteRegisterImpl::max_gpr + ++ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; ++ ++const int ConcreteRegisterImpl::max_vpr = ++ ConcreteRegisterImpl::max_fpr + ++ VectorRegisterImpl::number_of_registers * VectorRegisterImpl::max_slots_per_register; ++ ++ ++const char* RegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "zr", "ra", "sp", "gp", "tp", "x5", "x6", "x7", "fp", "x9", ++ "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", ++ "x18", "x19", "esp", "xdispatch", "xbcp", "xthread", "xlocals", ++ "xmonitors", "xcpool", "xheapbase", "x28", "x29", "x30", "xmethod" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} ++ ++const char* FloatRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} ++ ++const char* VectorRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", ++ "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", ++ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", ++ "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} +diff --git a/src/hotspot/cpu/riscv/register_riscv.hpp b/src/hotspot/cpu/riscv/register_riscv.hpp +new file mode 100644 +index 000000000..8beba6776 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/register_riscv.hpp +@@ -0,0 +1,337 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_REGISTER_RISCV_HPP ++#define CPU_RISCV_REGISTER_RISCV_HPP ++ ++#include "asm/register.hpp" ++ ++#define CSR_FFLAGS 0x001 // Floating-Point Accrued Exceptions. ++#define CSR_FRM 0x002 // Floating-Point Dynamic Rounding Mode. ++#define CSR_FCSR 0x003 // Floating-Point Control and Status Register (frm + fflags). ++#define CSR_VSTART 0x008 // Vector start position ++#define CSR_VXSAT 0x009 // Fixed-Point Saturate Flag ++#define CSR_VXRM 0x00A // Fixed-Point Rounding Mode ++#define CSR_VCSR 0x00F // Vector control and status register ++#define CSR_VL 0xC20 // Vector length ++#define CSR_VTYPE 0xC21 // Vector data type register ++#define CSR_VLENB 0xC22 // VLEN/8 (vector register length in bytes) ++#define CSR_CYCLE 0xc00 // Cycle counter for RDCYCLE instruction. ++#define CSR_TIME 0xc01 // Timer for RDTIME instruction. ++#define CSR_INSTERT 0xc02 // Instructions-retired counter for RDINSTRET instruction. ++ ++class VMRegImpl; ++typedef VMRegImpl* VMReg; ++ ++// Use Register as shortcut ++class RegisterImpl; ++typedef RegisterImpl* Register; ++ ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; ++} ++ ++class RegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ number_of_byte_registers = 32, ++ max_slots_per_register = 2 ++ }; ++ ++ // derived registers, offsets, and addresses ++ Register successor() const { return as_Register(encoding() + 1); } ++ ++ // construction ++ inline friend Register as_Register(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ bool has_byte_register() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; } ++ const char* name() const; ++ int encoding_nocheck() const { return (intptr_t)this; } ++ ++ // Return the bit which represents this register. This is intended ++ // to be ORed into a bitmask: for usage see class RegSet below. ++ unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; } ++}; ++ ++// The integer registers of the riscv architecture ++ ++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(Register, x0, (0)); ++CONSTANT_REGISTER_DECLARATION(Register, x1, (1)); ++CONSTANT_REGISTER_DECLARATION(Register, x2, (2)); ++CONSTANT_REGISTER_DECLARATION(Register, x3, (3)); ++CONSTANT_REGISTER_DECLARATION(Register, x4, (4)); ++CONSTANT_REGISTER_DECLARATION(Register, x5, (5)); ++CONSTANT_REGISTER_DECLARATION(Register, x6, (6)); ++CONSTANT_REGISTER_DECLARATION(Register, x7, (7)); ++CONSTANT_REGISTER_DECLARATION(Register, x8, (8)); ++CONSTANT_REGISTER_DECLARATION(Register, x9, (9)); ++CONSTANT_REGISTER_DECLARATION(Register, x10, (10)); ++CONSTANT_REGISTER_DECLARATION(Register, x11, (11)); ++CONSTANT_REGISTER_DECLARATION(Register, x12, (12)); ++CONSTANT_REGISTER_DECLARATION(Register, x13, (13)); ++CONSTANT_REGISTER_DECLARATION(Register, x14, (14)); ++CONSTANT_REGISTER_DECLARATION(Register, x15, (15)); ++CONSTANT_REGISTER_DECLARATION(Register, x16, (16)); ++CONSTANT_REGISTER_DECLARATION(Register, x17, (17)); ++CONSTANT_REGISTER_DECLARATION(Register, x18, (18)); ++CONSTANT_REGISTER_DECLARATION(Register, x19, (19)); ++CONSTANT_REGISTER_DECLARATION(Register, x20, (20)); ++CONSTANT_REGISTER_DECLARATION(Register, x21, (21)); ++CONSTANT_REGISTER_DECLARATION(Register, x22, (22)); ++CONSTANT_REGISTER_DECLARATION(Register, x23, (23)); ++CONSTANT_REGISTER_DECLARATION(Register, x24, (24)); ++CONSTANT_REGISTER_DECLARATION(Register, x25, (25)); ++CONSTANT_REGISTER_DECLARATION(Register, x26, (26)); ++CONSTANT_REGISTER_DECLARATION(Register, x27, (27)); ++CONSTANT_REGISTER_DECLARATION(Register, x28, (28)); ++CONSTANT_REGISTER_DECLARATION(Register, x29, (29)); ++CONSTANT_REGISTER_DECLARATION(Register, x30, (30)); ++CONSTANT_REGISTER_DECLARATION(Register, x31, (31)); ++ ++// Use FloatRegister as shortcut ++class FloatRegisterImpl; ++typedef FloatRegisterImpl* FloatRegister; ++ ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the architecture ++class FloatRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ max_slots_per_register = 2 ++ }; ++ ++ // construction ++ inline friend FloatRegister as_FloatRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ int encoding_nocheck() const { return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++// The float registers of the RISCV architecture ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); ++ ++// Use VectorRegister as shortcut ++class VectorRegisterImpl; ++typedef VectorRegisterImpl* VectorRegister; ++ ++inline VectorRegister as_VectorRegister(int encoding) { ++ return (VectorRegister)(intptr_t) encoding; ++} ++ ++// The implementation of vector registers for riscv-v ++class VectorRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32, ++ max_slots_per_register = 4 ++ }; ++ ++ // construction ++ inline friend VectorRegister as_VectorRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ VectorRegister successor() const { return as_VectorRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ int encoding_nocheck() const { return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++// The vector registers of RVV ++CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v10 , (10)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v11 , (11)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v12 , (12)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v13 , (13)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v14 , (14)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v15 , (15)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v16 , (16)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v17 , (17)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v18 , (18)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v19 , (19)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v20 , (20)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v21 , (21)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v22 , (22)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v23 , (23)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v24 , (24)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v25 , (25)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v26 , (26)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v27 , (27)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v28 , (28)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v29 , (29)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v30 , (30)); ++CONSTANT_REGISTER_DECLARATION(VectorRegister, v31 , (31)); ++ ++ ++// Need to know the total number of registers of all sorts for SharedInfo. ++// Define a class that exports it. ++class ConcreteRegisterImpl : public AbstractRegisterImpl { ++ public: ++ enum { ++ // A big enough number for C2: all the registers plus flags ++ // This number must be large enough to cover REG_COUNT (defined by c2) registers. ++ // There is no requirement that any ordering here matches any ordering c2 gives ++ // it's optoregs. ++ ++ number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + ++ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + ++ VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers) ++ }; ++ ++ // added to make it compile ++ static const int max_gpr; ++ static const int max_fpr; ++ static const int max_vpr; ++}; ++ ++// A set of registers ++class RegSet { ++ uint32_t _bitset; ++ ++public: ++ RegSet(uint32_t bitset) : _bitset(bitset) { } ++ ++ RegSet() : _bitset(0) { } ++ ++ RegSet(Register r1) : _bitset(r1->bit()) { } ++ ++ ~RegSet() {} ++ ++ RegSet operator+(const RegSet aSet) const { ++ RegSet result(_bitset | aSet._bitset); ++ return result; ++ } ++ ++ RegSet operator-(const RegSet aSet) const { ++ RegSet result(_bitset & ~aSet._bitset); ++ return result; ++ } ++ ++ RegSet &operator+=(const RegSet aSet) { ++ *this = *this + aSet; ++ return *this; ++ } ++ ++ static RegSet of(Register r1) { ++ return RegSet(r1); ++ } ++ ++ static RegSet of(Register r1, Register r2) { ++ return of(r1) + r2; ++ } ++ ++ static RegSet of(Register r1, Register r2, Register r3) { ++ return of(r1, r2) + r3; ++ } ++ ++ static RegSet of(Register r1, Register r2, Register r3, Register r4) { ++ return of(r1, r2, r3) + r4; ++ } ++ ++ static RegSet range(Register start, Register end) { ++ uint32_t bits = ~0; ++ bits <<= start->encoding(); ++ bits <<= (31 - end->encoding()); ++ bits >>= (31 - end->encoding()); ++ ++ return RegSet(bits); ++ } ++ ++ uint32_t bits() const { return _bitset; } ++}; ++ ++#endif // CPU_RISCV_REGISTER_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.cpp b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp +new file mode 100644 +index 000000000..f49fd6439 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.cpp +@@ -0,0 +1,113 @@ ++/* ++ * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/relocInfo.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/safepoint.hpp" ++ ++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { ++ if (verify_only) { ++ return; ++ } ++ ++ int bytes; ++ ++ switch(type()) { ++ case relocInfo::oop_type: { ++ oop_Relocation *reloc = (oop_Relocation *)this; ++ // in movoop when immediate == false ++ if (NativeInstruction::is_load_pc_relative_at(addr())) { ++ address constptr = (address)code()->oop_addr_at(reloc->oop_index()); ++ bytes = MacroAssembler::pd_patch_instruction_size(addr(), constptr); ++ assert(*(address*)constptr == x, "error in oop relocation"); ++ } else { ++ bytes = MacroAssembler::patch_oop(addr(), x); ++ } ++ break; ++ } ++ default: ++ bytes = MacroAssembler::pd_patch_instruction_size(addr(), x); ++ break; ++ } ++ ICache::invalidate_range(addr(), bytes); ++} ++ ++address Relocation::pd_call_destination(address orig_addr) { ++ assert(is_call(), "should be an address instruction here"); ++ if (NativeCall::is_call_at(addr())) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline != NULL) { ++ return nativeCallTrampolineStub_at(trampoline)->destination(); ++ } ++ } ++ if (orig_addr != NULL) { ++ // the extracted address from the instructions in address orig_addr ++ address new_addr = MacroAssembler::pd_call_destination(orig_addr); ++ // If call is branch to self, don't try to relocate it, just leave it ++ // as branch to self. This happens during code generation if the code ++ // buffer expands. It will be relocated to the trampoline above once ++ // code generation is complete. ++ new_addr = (new_addr == orig_addr) ? addr() : new_addr; ++ return new_addr; ++ } ++ return MacroAssembler::pd_call_destination(addr()); ++} ++ ++void Relocation::pd_set_call_destination(address x) { ++ assert(is_call(), "should be an address instruction here"); ++ if (NativeCall::is_call_at(addr())) { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline != NULL) { ++ nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false); ++ return; ++ } ++ } ++ MacroAssembler::pd_patch_instruction_size(addr(), x); ++ address pd_call = pd_call_destination(addr()); ++ assert(pd_call == x, "fail in reloc"); ++} ++ ++address* Relocation::pd_address_in_code() { ++ assert(NativeCall::is_load_pc_relative_at(addr()), "Not the expected instruction sequence!"); ++ return (address*)(MacroAssembler::target_addr_for_insn(addr())); ++} ++ ++address Relocation::pd_get_address_from_code() { ++ return MacroAssembler::pd_call_destination(addr()); ++} ++ ++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++ if (NativeInstruction::maybe_cpool_ref(addr())) { ++ address old_addr = old_addr_for(addr(), src, dest); ++ MacroAssembler::pd_patch_instruction_size(addr(), MacroAssembler::target_addr_for_insn(old_addr)); ++ } ++} ++ ++void metadata_Relocation::pd_fix_value(address x) { ++} +diff --git a/src/hotspot/cpu/riscv/relocInfo_riscv.hpp b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp +new file mode 100644 +index 000000000..c30150e0a +--- /dev/null ++++ b/src/hotspot/cpu/riscv/relocInfo_riscv.hpp +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_RELOCINFO_RISCV_HPP ++#define CPU_RISCV_RELOCINFO_RISCV_HPP ++ ++ // machine-dependent parts of class relocInfo ++ private: ++ enum { ++ // Relocations are byte-aligned. ++ offset_unit = 1, ++ // We don't use format(). ++ format_width = 0 ++ }; ++ ++ public: ++ ++ // This platform has no oops in the code that are not also ++ // listed in the oop section. ++ static bool mustIterateImmediateOopsInCode() { return false; } ++ ++#endif // CPU_RISCV_RELOCINFO_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad +new file mode 100644 +index 000000000..137e9b7c7 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/riscv.ad +@@ -0,0 +1,10685 @@ ++// ++// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. ++// Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// RISCV Architecture Description File ++ ++//----------REGISTER DEFINITION BLOCK------------------------------------------ ++// This information is used by the matcher and the register allocator to ++// describe individual registers and classes of registers within the target ++// archtecture. ++ ++register %{ ++//----------Architecture Description Register Definitions---------------------- ++// General Registers ++// "reg_def" name ( register save type, C convention save type, ++// ideal register type, encoding ); ++// Register Save Types: ++// ++// NS = No-Save: The register allocator assumes that these registers ++// can be used without saving upon entry to the method, & ++// that they do not need to be saved at call sites. ++// ++// SOC = Save-On-Call: The register allocator assumes that these registers ++// can be used without saving upon entry to the method, ++// but that they must be saved at call sites. ++// ++// SOE = Save-On-Entry: The register allocator assumes that these registers ++// must be saved before using them upon entry to the ++// method, but they do not need to be saved at call ++// sites. ++// ++// AS = Always-Save: The register allocator assumes that these registers ++// must be saved before using them upon entry to the ++// method, & that they must be saved at call sites. ++// ++// Ideal Register Type is used to determine how to save & restore a ++// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get ++// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. ++// ++// The encoding number is the actual bit-pattern placed into the opcodes. ++ ++// We must define the 64 bit int registers in two 32 bit halves, the ++// real lower register and a virtual upper half register. upper halves ++// are used by the register allocator but are not actually supplied as ++// operands to memory ops. ++// ++// follow the C1 compiler in making registers ++// ++// x7, x9-x17, x28-x31 volatile (caller save) ++// x0-x4, x8, x27 system (no save, no allocate) ++// x5-x6 non-allocatable (so we can use them as temporary regs) ++ ++// ++// as regards Java usage. we don't use any callee save registers ++// because this makes it difficult to de-optimise a frame (see comment ++// in x86 implementation of Deoptimization::unwind_callee_save_values) ++// ++ ++// General Registers ++ ++reg_def R0 ( NS, NS, Op_RegI, 0, x0->as_VMReg() ); // zr ++reg_def R0_H ( NS, NS, Op_RegI, 0, x0->as_VMReg()->next() ); ++reg_def R1 ( SOC, SOC, Op_RegI, 1, x1->as_VMReg() ); // ra ++reg_def R1_H ( SOC, SOC, Op_RegI, 1, x1->as_VMReg()->next() ); ++reg_def R2 ( NS, SOE, Op_RegI, 2, x2->as_VMReg() ); // sp ++reg_def R2_H ( NS, SOE, Op_RegI, 2, x2->as_VMReg()->next() ); ++reg_def R3 ( NS, NS, Op_RegI, 3, x3->as_VMReg() ); // gp ++reg_def R3_H ( NS, NS, Op_RegI, 3, x3->as_VMReg()->next() ); ++reg_def R4 ( NS, NS, Op_RegI, 4, x4->as_VMReg() ); // tp ++reg_def R4_H ( NS, NS, Op_RegI, 4, x4->as_VMReg()->next() ); ++reg_def R7 ( SOC, SOC, Op_RegI, 7, x7->as_VMReg() ); ++reg_def R7_H ( SOC, SOC, Op_RegI, 7, x7->as_VMReg()->next() ); ++reg_def R8 ( NS, SOE, Op_RegI, 8, x8->as_VMReg() ); // fp ++reg_def R8_H ( NS, SOE, Op_RegI, 8, x8->as_VMReg()->next() ); ++reg_def R9 ( SOC, SOE, Op_RegI, 9, x9->as_VMReg() ); ++reg_def R9_H ( SOC, SOE, Op_RegI, 9, x9->as_VMReg()->next() ); ++reg_def R10 ( SOC, SOC, Op_RegI, 10, x10->as_VMReg() ); ++reg_def R10_H ( SOC, SOC, Op_RegI, 10, x10->as_VMReg()->next()); ++reg_def R11 ( SOC, SOC, Op_RegI, 11, x11->as_VMReg() ); ++reg_def R11_H ( SOC, SOC, Op_RegI, 11, x11->as_VMReg()->next()); ++reg_def R12 ( SOC, SOC, Op_RegI, 12, x12->as_VMReg() ); ++reg_def R12_H ( SOC, SOC, Op_RegI, 12, x12->as_VMReg()->next()); ++reg_def R13 ( SOC, SOC, Op_RegI, 13, x13->as_VMReg() ); ++reg_def R13_H ( SOC, SOC, Op_RegI, 13, x13->as_VMReg()->next()); ++reg_def R14 ( SOC, SOC, Op_RegI, 14, x14->as_VMReg() ); ++reg_def R14_H ( SOC, SOC, Op_RegI, 14, x14->as_VMReg()->next()); ++reg_def R15 ( SOC, SOC, Op_RegI, 15, x15->as_VMReg() ); ++reg_def R15_H ( SOC, SOC, Op_RegI, 15, x15->as_VMReg()->next()); ++reg_def R16 ( SOC, SOC, Op_RegI, 16, x16->as_VMReg() ); ++reg_def R16_H ( SOC, SOC, Op_RegI, 16, x16->as_VMReg()->next()); ++reg_def R17 ( SOC, SOC, Op_RegI, 17, x17->as_VMReg() ); ++reg_def R17_H ( SOC, SOC, Op_RegI, 17, x17->as_VMReg()->next()); ++reg_def R18 ( SOC, SOE, Op_RegI, 18, x18->as_VMReg() ); ++reg_def R18_H ( SOC, SOE, Op_RegI, 18, x18->as_VMReg()->next()); ++reg_def R19 ( SOC, SOE, Op_RegI, 19, x19->as_VMReg() ); ++reg_def R19_H ( SOC, SOE, Op_RegI, 19, x19->as_VMReg()->next()); ++reg_def R20 ( SOC, SOE, Op_RegI, 20, x20->as_VMReg() ); // caller esp ++reg_def R20_H ( SOC, SOE, Op_RegI, 20, x20->as_VMReg()->next()); ++reg_def R21 ( SOC, SOE, Op_RegI, 21, x21->as_VMReg() ); ++reg_def R21_H ( SOC, SOE, Op_RegI, 21, x21->as_VMReg()->next()); ++reg_def R22 ( SOC, SOE, Op_RegI, 22, x22->as_VMReg() ); ++reg_def R22_H ( SOC, SOE, Op_RegI, 22, x22->as_VMReg()->next()); ++reg_def R23 ( NS, SOE, Op_RegI, 23, x23->as_VMReg() ); // java thread ++reg_def R23_H ( NS, SOE, Op_RegI, 23, x23->as_VMReg()->next()); ++reg_def R24 ( SOC, SOE, Op_RegI, 24, x24->as_VMReg() ); ++reg_def R24_H ( SOC, SOE, Op_RegI, 24, x24->as_VMReg()->next()); ++reg_def R25 ( SOC, SOE, Op_RegI, 25, x25->as_VMReg() ); ++reg_def R25_H ( SOC, SOE, Op_RegI, 25, x25->as_VMReg()->next()); ++reg_def R26 ( SOC, SOE, Op_RegI, 26, x26->as_VMReg() ); ++reg_def R26_H ( SOC, SOE, Op_RegI, 26, x26->as_VMReg()->next()); ++reg_def R27 ( SOC, SOE, Op_RegI, 27, x27->as_VMReg() ); // heapbase ++reg_def R27_H ( SOC, SOE, Op_RegI, 27, x27->as_VMReg()->next()); ++reg_def R28 ( SOC, SOC, Op_RegI, 28, x28->as_VMReg() ); ++reg_def R28_H ( SOC, SOC, Op_RegI, 28, x28->as_VMReg()->next()); ++reg_def R29 ( SOC, SOC, Op_RegI, 29, x29->as_VMReg() ); ++reg_def R29_H ( SOC, SOC, Op_RegI, 29, x29->as_VMReg()->next()); ++reg_def R30 ( SOC, SOC, Op_RegI, 30, x30->as_VMReg() ); ++reg_def R30_H ( SOC, SOC, Op_RegI, 30, x30->as_VMReg()->next()); ++reg_def R31 ( SOC, SOC, Op_RegI, 31, x31->as_VMReg() ); ++reg_def R31_H ( SOC, SOC, Op_RegI, 31, x31->as_VMReg()->next()); ++ ++// ---------------------------- ++// Float/Double Registers ++// ---------------------------- ++ ++// Double Registers ++ ++// The rules of ADL require that double registers be defined in pairs. ++// Each pair must be two 32-bit values, but not necessarily a pair of ++// single float registers. In each pair, ADLC-assigned register numbers ++// must be adjacent, with the lower number even. Finally, when the ++// CPU stores such a register pair to memory, the word associated with ++// the lower ADLC-assigned number must be stored to the lower address. ++ ++// RISCV has 32 floating-point registers. Each can store a single ++// or double precision floating-point value. ++ ++// for Java use float registers f0-f31 are always save on call whereas ++// the platform ABI treats f8-f9 and f18-f27 as callee save). Other ++// float registers are SOC as per the platform spec ++ ++reg_def F0 ( SOC, SOC, Op_RegF, 0, f0->as_VMReg() ); ++reg_def F0_H ( SOC, SOC, Op_RegF, 0, f0->as_VMReg()->next() ); ++reg_def F1 ( SOC, SOC, Op_RegF, 1, f1->as_VMReg() ); ++reg_def F1_H ( SOC, SOC, Op_RegF, 1, f1->as_VMReg()->next() ); ++reg_def F2 ( SOC, SOC, Op_RegF, 2, f2->as_VMReg() ); ++reg_def F2_H ( SOC, SOC, Op_RegF, 2, f2->as_VMReg()->next() ); ++reg_def F3 ( SOC, SOC, Op_RegF, 3, f3->as_VMReg() ); ++reg_def F3_H ( SOC, SOC, Op_RegF, 3, f3->as_VMReg()->next() ); ++reg_def F4 ( SOC, SOC, Op_RegF, 4, f4->as_VMReg() ); ++reg_def F4_H ( SOC, SOC, Op_RegF, 4, f4->as_VMReg()->next() ); ++reg_def F5 ( SOC, SOC, Op_RegF, 5, f5->as_VMReg() ); ++reg_def F5_H ( SOC, SOC, Op_RegF, 5, f5->as_VMReg()->next() ); ++reg_def F6 ( SOC, SOC, Op_RegF, 6, f6->as_VMReg() ); ++reg_def F6_H ( SOC, SOC, Op_RegF, 6, f6->as_VMReg()->next() ); ++reg_def F7 ( SOC, SOC, Op_RegF, 7, f7->as_VMReg() ); ++reg_def F7_H ( SOC, SOC, Op_RegF, 7, f7->as_VMReg()->next() ); ++reg_def F8 ( SOC, SOE, Op_RegF, 8, f8->as_VMReg() ); ++reg_def F8_H ( SOC, SOE, Op_RegF, 8, f8->as_VMReg()->next() ); ++reg_def F9 ( SOC, SOE, Op_RegF, 9, f9->as_VMReg() ); ++reg_def F9_H ( SOC, SOE, Op_RegF, 9, f9->as_VMReg()->next() ); ++reg_def F10 ( SOC, SOC, Op_RegF, 10, f10->as_VMReg() ); ++reg_def F10_H ( SOC, SOC, Op_RegF, 10, f10->as_VMReg()->next() ); ++reg_def F11 ( SOC, SOC, Op_RegF, 11, f11->as_VMReg() ); ++reg_def F11_H ( SOC, SOC, Op_RegF, 11, f11->as_VMReg()->next() ); ++reg_def F12 ( SOC, SOC, Op_RegF, 12, f12->as_VMReg() ); ++reg_def F12_H ( SOC, SOC, Op_RegF, 12, f12->as_VMReg()->next() ); ++reg_def F13 ( SOC, SOC, Op_RegF, 13, f13->as_VMReg() ); ++reg_def F13_H ( SOC, SOC, Op_RegF, 13, f13->as_VMReg()->next() ); ++reg_def F14 ( SOC, SOC, Op_RegF, 14, f14->as_VMReg() ); ++reg_def F14_H ( SOC, SOC, Op_RegF, 14, f14->as_VMReg()->next() ); ++reg_def F15 ( SOC, SOC, Op_RegF, 15, f15->as_VMReg() ); ++reg_def F15_H ( SOC, SOC, Op_RegF, 15, f15->as_VMReg()->next() ); ++reg_def F16 ( SOC, SOC, Op_RegF, 16, f16->as_VMReg() ); ++reg_def F16_H ( SOC, SOC, Op_RegF, 16, f16->as_VMReg()->next() ); ++reg_def F17 ( SOC, SOC, Op_RegF, 17, f17->as_VMReg() ); ++reg_def F17_H ( SOC, SOC, Op_RegF, 17, f17->as_VMReg()->next() ); ++reg_def F18 ( SOC, SOE, Op_RegF, 18, f18->as_VMReg() ); ++reg_def F18_H ( SOC, SOE, Op_RegF, 18, f18->as_VMReg()->next() ); ++reg_def F19 ( SOC, SOE, Op_RegF, 19, f19->as_VMReg() ); ++reg_def F19_H ( SOC, SOE, Op_RegF, 19, f19->as_VMReg()->next() ); ++reg_def F20 ( SOC, SOE, Op_RegF, 20, f20->as_VMReg() ); ++reg_def F20_H ( SOC, SOE, Op_RegF, 20, f20->as_VMReg()->next() ); ++reg_def F21 ( SOC, SOE, Op_RegF, 21, f21->as_VMReg() ); ++reg_def F21_H ( SOC, SOE, Op_RegF, 21, f21->as_VMReg()->next() ); ++reg_def F22 ( SOC, SOE, Op_RegF, 22, f22->as_VMReg() ); ++reg_def F22_H ( SOC, SOE, Op_RegF, 22, f22->as_VMReg()->next() ); ++reg_def F23 ( SOC, SOE, Op_RegF, 23, f23->as_VMReg() ); ++reg_def F23_H ( SOC, SOE, Op_RegF, 23, f23->as_VMReg()->next() ); ++reg_def F24 ( SOC, SOE, Op_RegF, 24, f24->as_VMReg() ); ++reg_def F24_H ( SOC, SOE, Op_RegF, 24, f24->as_VMReg()->next() ); ++reg_def F25 ( SOC, SOE, Op_RegF, 25, f25->as_VMReg() ); ++reg_def F25_H ( SOC, SOE, Op_RegF, 25, f25->as_VMReg()->next() ); ++reg_def F26 ( SOC, SOE, Op_RegF, 26, f26->as_VMReg() ); ++reg_def F26_H ( SOC, SOE, Op_RegF, 26, f26->as_VMReg()->next() ); ++reg_def F27 ( SOC, SOE, Op_RegF, 27, f27->as_VMReg() ); ++reg_def F27_H ( SOC, SOE, Op_RegF, 27, f27->as_VMReg()->next() ); ++reg_def F28 ( SOC, SOC, Op_RegF, 28, f28->as_VMReg() ); ++reg_def F28_H ( SOC, SOC, Op_RegF, 28, f28->as_VMReg()->next() ); ++reg_def F29 ( SOC, SOC, Op_RegF, 29, f29->as_VMReg() ); ++reg_def F29_H ( SOC, SOC, Op_RegF, 29, f29->as_VMReg()->next() ); ++reg_def F30 ( SOC, SOC, Op_RegF, 30, f30->as_VMReg() ); ++reg_def F30_H ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next() ); ++reg_def F31 ( SOC, SOC, Op_RegF, 31, f31->as_VMReg() ); ++reg_def F31_H ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next() ); ++ ++// ---------------------------- ++// Vector Registers ++// ---------------------------- ++ ++// For RVV vector registers, we simply extend vector register size to 4 ++// 'logical' slots. This is nominally 128 bits but it actually covers ++// all possible 'physical' RVV vector register lengths from 128 ~ 1024 ++// bits. The 'physical' RVV vector register length is detected during ++// startup, so the register allocator is able to identify the correct ++// number of bytes needed for an RVV spill/unspill. ++// for Java use vector registers v0-v31 are always save on call just ++// as the platform ABI treats v0-v31 as caller save. ++ ++reg_def V0 ( SOC, SOC, Op_VecA, 0, v0->as_VMReg() ); ++reg_def V0_H ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next() ); ++reg_def V0_J ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(2) ); ++reg_def V0_K ( SOC, SOC, Op_VecA, 0, v0->as_VMReg()->next(3) ); ++ ++reg_def V1 ( SOC, SOC, Op_VecA, 1, v1->as_VMReg() ); ++reg_def V1_H ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next() ); ++reg_def V1_J ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(2) ); ++reg_def V1_K ( SOC, SOC, Op_VecA, 1, v1->as_VMReg()->next(3) ); ++ ++reg_def V2 ( SOC, SOC, Op_VecA, 2, v2->as_VMReg() ); ++reg_def V2_H ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next() ); ++reg_def V2_J ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(2) ); ++reg_def V2_K ( SOC, SOC, Op_VecA, 2, v2->as_VMReg()->next(3) ); ++ ++reg_def V3 ( SOC, SOC, Op_VecA, 3, v3->as_VMReg() ); ++reg_def V3_H ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next() ); ++reg_def V3_J ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(2) ); ++reg_def V3_K ( SOC, SOC, Op_VecA, 3, v3->as_VMReg()->next(3) ); ++ ++reg_def V4 ( SOC, SOC, Op_VecA, 4, v4->as_VMReg() ); ++reg_def V4_H ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next() ); ++reg_def V4_J ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(2) ); ++reg_def V4_K ( SOC, SOC, Op_VecA, 4, v4->as_VMReg()->next(3) ); ++ ++reg_def V5 ( SOC, SOC, Op_VecA, 5, v5->as_VMReg() ); ++reg_def V5_H ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next() ); ++reg_def V5_J ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(2) ); ++reg_def V5_K ( SOC, SOC, Op_VecA, 5, v5->as_VMReg()->next(3) ); ++ ++reg_def V6 ( SOC, SOC, Op_VecA, 6, v6->as_VMReg() ); ++reg_def V6_H ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next() ); ++reg_def V6_J ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(2) ); ++reg_def V6_K ( SOC, SOC, Op_VecA, 6, v6->as_VMReg()->next(3) ); ++ ++reg_def V7 ( SOC, SOC, Op_VecA, 7, v7->as_VMReg() ); ++reg_def V7_H ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next() ); ++reg_def V7_J ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(2) ); ++reg_def V7_K ( SOC, SOC, Op_VecA, 7, v7->as_VMReg()->next(3) ); ++ ++reg_def V8 ( SOC, SOC, Op_VecA, 8, v8->as_VMReg() ); ++reg_def V8_H ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next() ); ++reg_def V8_J ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(2) ); ++reg_def V8_K ( SOC, SOC, Op_VecA, 8, v8->as_VMReg()->next(3) ); ++ ++reg_def V9 ( SOC, SOC, Op_VecA, 9, v9->as_VMReg() ); ++reg_def V9_H ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next() ); ++reg_def V9_J ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(2) ); ++reg_def V9_K ( SOC, SOC, Op_VecA, 9, v9->as_VMReg()->next(3) ); ++ ++reg_def V10 ( SOC, SOC, Op_VecA, 10, v10->as_VMReg() ); ++reg_def V10_H ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next() ); ++reg_def V10_J ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(2) ); ++reg_def V10_K ( SOC, SOC, Op_VecA, 10, v10->as_VMReg()->next(3) ); ++ ++reg_def V11 ( SOC, SOC, Op_VecA, 11, v11->as_VMReg() ); ++reg_def V11_H ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next() ); ++reg_def V11_J ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(2) ); ++reg_def V11_K ( SOC, SOC, Op_VecA, 11, v11->as_VMReg()->next(3) ); ++ ++reg_def V12 ( SOC, SOC, Op_VecA, 12, v12->as_VMReg() ); ++reg_def V12_H ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next() ); ++reg_def V12_J ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(2) ); ++reg_def V12_K ( SOC, SOC, Op_VecA, 12, v12->as_VMReg()->next(3) ); ++ ++reg_def V13 ( SOC, SOC, Op_VecA, 13, v13->as_VMReg() ); ++reg_def V13_H ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next() ); ++reg_def V13_J ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(2) ); ++reg_def V13_K ( SOC, SOC, Op_VecA, 13, v13->as_VMReg()->next(3) ); ++ ++reg_def V14 ( SOC, SOC, Op_VecA, 14, v14->as_VMReg() ); ++reg_def V14_H ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next() ); ++reg_def V14_J ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(2) ); ++reg_def V14_K ( SOC, SOC, Op_VecA, 14, v14->as_VMReg()->next(3) ); ++ ++reg_def V15 ( SOC, SOC, Op_VecA, 15, v15->as_VMReg() ); ++reg_def V15_H ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next() ); ++reg_def V15_J ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(2) ); ++reg_def V15_K ( SOC, SOC, Op_VecA, 15, v15->as_VMReg()->next(3) ); ++ ++reg_def V16 ( SOC, SOC, Op_VecA, 16, v16->as_VMReg() ); ++reg_def V16_H ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next() ); ++reg_def V16_J ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(2) ); ++reg_def V16_K ( SOC, SOC, Op_VecA, 16, v16->as_VMReg()->next(3) ); ++ ++reg_def V17 ( SOC, SOC, Op_VecA, 17, v17->as_VMReg() ); ++reg_def V17_H ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next() ); ++reg_def V17_J ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(2) ); ++reg_def V17_K ( SOC, SOC, Op_VecA, 17, v17->as_VMReg()->next(3) ); ++ ++reg_def V18 ( SOC, SOC, Op_VecA, 18, v18->as_VMReg() ); ++reg_def V18_H ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next() ); ++reg_def V18_J ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(2) ); ++reg_def V18_K ( SOC, SOC, Op_VecA, 18, v18->as_VMReg()->next(3) ); ++ ++reg_def V19 ( SOC, SOC, Op_VecA, 19, v19->as_VMReg() ); ++reg_def V19_H ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next() ); ++reg_def V19_J ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(2) ); ++reg_def V19_K ( SOC, SOC, Op_VecA, 19, v19->as_VMReg()->next(3) ); ++ ++reg_def V20 ( SOC, SOC, Op_VecA, 20, v20->as_VMReg() ); ++reg_def V20_H ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next() ); ++reg_def V20_J ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(2) ); ++reg_def V20_K ( SOC, SOC, Op_VecA, 20, v20->as_VMReg()->next(3) ); ++ ++reg_def V21 ( SOC, SOC, Op_VecA, 21, v21->as_VMReg() ); ++reg_def V21_H ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next() ); ++reg_def V21_J ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(2) ); ++reg_def V21_K ( SOC, SOC, Op_VecA, 21, v21->as_VMReg()->next(3) ); ++ ++reg_def V22 ( SOC, SOC, Op_VecA, 22, v22->as_VMReg() ); ++reg_def V22_H ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next() ); ++reg_def V22_J ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(2) ); ++reg_def V22_K ( SOC, SOC, Op_VecA, 22, v22->as_VMReg()->next(3) ); ++ ++reg_def V23 ( SOC, SOC, Op_VecA, 23, v23->as_VMReg() ); ++reg_def V23_H ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next() ); ++reg_def V23_J ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(2) ); ++reg_def V23_K ( SOC, SOC, Op_VecA, 23, v23->as_VMReg()->next(3) ); ++ ++reg_def V24 ( SOC, SOC, Op_VecA, 24, v24->as_VMReg() ); ++reg_def V24_H ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next() ); ++reg_def V24_J ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(2) ); ++reg_def V24_K ( SOC, SOC, Op_VecA, 24, v24->as_VMReg()->next(3) ); ++ ++reg_def V25 ( SOC, SOC, Op_VecA, 25, v25->as_VMReg() ); ++reg_def V25_H ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next() ); ++reg_def V25_J ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(2) ); ++reg_def V25_K ( SOC, SOC, Op_VecA, 25, v25->as_VMReg()->next(3) ); ++ ++reg_def V26 ( SOC, SOC, Op_VecA, 26, v26->as_VMReg() ); ++reg_def V26_H ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next() ); ++reg_def V26_J ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(2) ); ++reg_def V26_K ( SOC, SOC, Op_VecA, 26, v26->as_VMReg()->next(3) ); ++ ++reg_def V27 ( SOC, SOC, Op_VecA, 27, v27->as_VMReg() ); ++reg_def V27_H ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next() ); ++reg_def V27_J ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(2) ); ++reg_def V27_K ( SOC, SOC, Op_VecA, 27, v27->as_VMReg()->next(3) ); ++ ++reg_def V28 ( SOC, SOC, Op_VecA, 28, v28->as_VMReg() ); ++reg_def V28_H ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next() ); ++reg_def V28_J ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(2) ); ++reg_def V28_K ( SOC, SOC, Op_VecA, 28, v28->as_VMReg()->next(3) ); ++ ++reg_def V29 ( SOC, SOC, Op_VecA, 29, v29->as_VMReg() ); ++reg_def V29_H ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next() ); ++reg_def V29_J ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(2) ); ++reg_def V29_K ( SOC, SOC, Op_VecA, 29, v29->as_VMReg()->next(3) ); ++ ++reg_def V30 ( SOC, SOC, Op_VecA, 30, v30->as_VMReg() ); ++reg_def V30_H ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next() ); ++reg_def V30_J ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(2) ); ++reg_def V30_K ( SOC, SOC, Op_VecA, 30, v30->as_VMReg()->next(3) ); ++ ++reg_def V31 ( SOC, SOC, Op_VecA, 31, v31->as_VMReg() ); ++reg_def V31_H ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next() ); ++reg_def V31_J ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(2) ); ++reg_def V31_K ( SOC, SOC, Op_VecA, 31, v31->as_VMReg()->next(3) ); ++ ++// ---------------------------- ++// Special Registers ++// ---------------------------- ++ ++// On riscv, the physical flag register is missing, so we use t1 instead, ++// to bridge the RegFlag semantics in share/opto ++ ++reg_def RFLAGS (SOC, SOC, Op_RegFlags, 6, x6->as_VMReg() ); ++ ++// Specify priority of register selection within phases of register ++// allocation. Highest priority is first. A useful heuristic is to ++// give registers a low priority when they are required by machine ++// instructions, like EAX and EDX on I486, and choose no-save registers ++// before save-on-call, & save-on-call before save-on-entry. Registers ++// which participate in fixed calling sequences should come last. ++// Registers which are used as pairs must fall on an even boundary. ++ ++alloc_class chunk0( ++ // volatiles ++ R7, R7_H, ++ R28, R28_H, ++ R29, R29_H, ++ R30, R30_H, ++ R31, R31_H, ++ ++ // arg registers ++ R10, R10_H, ++ R11, R11_H, ++ R12, R12_H, ++ R13, R13_H, ++ R14, R14_H, ++ R15, R15_H, ++ R16, R16_H, ++ R17, R17_H, ++ ++ // non-volatiles ++ R9, R9_H, ++ R18, R18_H, ++ R19, R19_H, ++ R20, R20_H, ++ R21, R21_H, ++ R22, R22_H, ++ R24, R24_H, ++ R25, R25_H, ++ R26, R26_H, ++ ++ // non-allocatable registers ++ R23, R23_H, // java thread ++ R27, R27_H, // heapbase ++ R4, R4_H, // thread ++ R8, R8_H, // fp ++ R0, R0_H, // zero ++ R1, R1_H, // ra ++ R2, R2_H, // sp ++ R3, R3_H, // gp ++); ++ ++alloc_class chunk1( ++ ++ // no save ++ F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F28, F28_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H, ++ ++ // arg registers ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ ++ // non-volatiles ++ F8, F8_H, ++ F9, F9_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++); ++ ++alloc_class chunk2( ++ V0, V0_H, V0_J, V0_K, ++ V1, V1_H, V1_J, V1_K, ++ V2, V2_H, V2_J, V2_K, ++ V3, V3_H, V3_J, V3_K, ++ V4, V4_H, V4_J, V4_K, ++ V5, V5_H, V5_J, V5_K, ++ V6, V6_H, V6_J, V6_K, ++ V7, V7_H, V7_J, V7_K, ++ V8, V8_H, V8_J, V8_K, ++ V9, V9_H, V9_J, V9_K, ++ V10, V10_H, V10_J, V10_K, ++ V11, V11_H, V11_J, V11_K, ++ V12, V12_H, V12_J, V12_K, ++ V13, V13_H, V13_J, V13_K, ++ V14, V14_H, V14_J, V14_K, ++ V15, V15_H, V15_J, V15_K, ++ V16, V16_H, V16_J, V16_K, ++ V17, V17_H, V17_J, V17_K, ++ V18, V18_H, V18_J, V18_K, ++ V19, V19_H, V19_J, V19_K, ++ V20, V20_H, V20_J, V20_K, ++ V21, V21_H, V21_J, V21_K, ++ V22, V22_H, V22_J, V22_K, ++ V23, V23_H, V23_J, V23_K, ++ V24, V24_H, V24_J, V24_K, ++ V25, V25_H, V25_J, V25_K, ++ V26, V26_H, V26_J, V26_K, ++ V27, V27_H, V27_J, V27_K, ++ V28, V28_H, V28_J, V28_K, ++ V29, V29_H, V29_J, V29_K, ++ V30, V30_H, V30_J, V30_K, ++ V31, V31_H, V31_J, V31_K, ++); ++ ++alloc_class chunk3(RFLAGS); ++ ++//----------Architecture Description Register Classes-------------------------- ++// Several register classes are automatically defined based upon information in ++// this architecture description. ++// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) ++// 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) ++// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) ++// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) ++// ++ ++// Class for all 32 bit general purpose registers ++reg_class all_reg32( ++ R0, ++ R1, ++ R2, ++ R3, ++ R4, ++ R7, ++ R8, ++ R9, ++ R10, ++ R11, ++ R12, ++ R13, ++ R14, ++ R15, ++ R16, ++ R17, ++ R18, ++ R19, ++ R20, ++ R21, ++ R22, ++ R23, ++ R24, ++ R25, ++ R26, ++ R27, ++ R28, ++ R29, ++ R30, ++ R31 ++); ++ ++// Class for any 32 bit integer registers (excluding zr) ++reg_class any_reg32 %{ ++ return _ANY_REG32_mask; ++%} ++ ++// Singleton class for R10 int register ++reg_class int_r10_reg(R10); ++ ++// Singleton class for R12 int register ++reg_class int_r12_reg(R12); ++ ++// Singleton class for R13 int register ++reg_class int_r13_reg(R13); ++ ++// Singleton class for R14 int register ++reg_class int_r14_reg(R14); ++ ++// Class for all long integer registers ++reg_class all_reg( ++ R0, R0_H, ++ R1, R1_H, ++ R2, R2_H, ++ R3, R3_H, ++ R4, R4_H, ++ R7, R7_H, ++ R8, R8_H, ++ R9, R9_H, ++ R10, R10_H, ++ R11, R11_H, ++ R12, R12_H, ++ R13, R13_H, ++ R14, R14_H, ++ R15, R15_H, ++ R16, R16_H, ++ R17, R17_H, ++ R18, R18_H, ++ R19, R19_H, ++ R20, R20_H, ++ R21, R21_H, ++ R22, R22_H, ++ R23, R23_H, ++ R24, R24_H, ++ R25, R25_H, ++ R26, R26_H, ++ R27, R27_H, ++ R28, R28_H, ++ R29, R29_H, ++ R30, R30_H, ++ R31, R31_H ++); ++ ++// Class for all long integer registers (excluding zr) ++reg_class any_reg %{ ++ return _ANY_REG_mask; ++%} ++ ++// Class for non-allocatable 32 bit registers ++reg_class non_allocatable_reg32( ++ R0, // zr ++ R1, // ra ++ R2, // sp ++ R3, // gp ++ R4, // tp ++ R23 // java thread ++); ++ ++// Class for non-allocatable 64 bit registers ++reg_class non_allocatable_reg( ++ R0, R0_H, // zr ++ R1, R1_H, // ra ++ R2, R2_H, // sp ++ R3, R3_H, // gp ++ R4, R4_H, // tp ++ R23, R23_H // java thread ++); ++ ++reg_class no_special_reg32 %{ ++ return _NO_SPECIAL_REG32_mask; ++%} ++ ++reg_class no_special_reg %{ ++ return _NO_SPECIAL_REG_mask; ++%} ++ ++reg_class ptr_reg %{ ++ return _PTR_REG_mask; ++%} ++ ++reg_class no_special_ptr_reg %{ ++ return _NO_SPECIAL_PTR_REG_mask; ++%} ++ ++// Class for 64 bit register r10 ++reg_class r10_reg( ++ R10, R10_H ++); ++ ++// Class for 64 bit register r11 ++reg_class r11_reg( ++ R11, R11_H ++); ++ ++// Class for 64 bit register r12 ++reg_class r12_reg( ++ R12, R12_H ++); ++ ++// Class for 64 bit register r13 ++reg_class r13_reg( ++ R13, R13_H ++); ++ ++// Class for 64 bit register r14 ++reg_class r14_reg( ++ R14, R14_H ++); ++ ++// Class for 64 bit register r15 ++reg_class r15_reg( ++ R15, R15_H ++); ++ ++// Class for 64 bit register r16 ++reg_class r16_reg( ++ R16, R16_H ++); ++ ++// Class for method register ++reg_class method_reg( ++ R31, R31_H ++); ++ ++// Class for heapbase register ++reg_class heapbase_reg( ++ R27, R27_H ++); ++ ++// Class for java thread register ++reg_class java_thread_reg( ++ R23, R23_H ++); ++ ++reg_class r28_reg( ++ R28, R28_H ++); ++ ++reg_class r29_reg( ++ R29, R29_H ++); ++ ++reg_class r30_reg( ++ R30, R30_H ++); ++ ++// Class for zero registesr ++reg_class zr_reg( ++ R0, R0_H ++); ++ ++// Class for thread register ++reg_class thread_reg( ++ R4, R4_H ++); ++ ++// Class for frame pointer register ++reg_class fp_reg( ++ R8, R8_H ++); ++ ++// Class for link register ++reg_class lr_reg( ++ R1, R1_H ++); ++ ++// Class for long sp register ++reg_class sp_reg( ++ R2, R2_H ++); ++ ++// Class for all float registers ++reg_class float_reg( ++ F0, ++ F1, ++ F2, ++ F3, ++ F4, ++ F5, ++ F6, ++ F7, ++ F8, ++ F9, ++ F10, ++ F11, ++ F12, ++ F13, ++ F14, ++ F15, ++ F16, ++ F17, ++ F18, ++ F19, ++ F20, ++ F21, ++ F22, ++ F23, ++ F24, ++ F25, ++ F26, ++ F27, ++ F28, ++ F29, ++ F30, ++ F31 ++); ++ ++// Double precision float registers have virtual `high halves' that ++// are needed by the allocator. ++// Class for all double registers ++reg_class double_reg( ++ F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H ++); ++ ++// Class for all RVV vector registers ++reg_class vectora_reg( ++ V1, V1_H, V1_J, V1_K, ++ V2, V2_H, V2_J, V2_K, ++ V3, V3_H, V3_J, V3_K, ++ V4, V4_H, V4_J, V4_K, ++ V5, V5_H, V5_J, V5_K, ++ V6, V6_H, V6_J, V6_K, ++ V7, V7_H, V7_J, V7_K, ++ V8, V8_H, V8_J, V8_K, ++ V9, V9_H, V9_J, V9_K, ++ V10, V10_H, V10_J, V10_K, ++ V11, V11_H, V11_J, V11_K, ++ V12, V12_H, V12_J, V12_K, ++ V13, V13_H, V13_J, V13_K, ++ V14, V14_H, V14_J, V14_K, ++ V15, V15_H, V15_J, V15_K, ++ V16, V16_H, V16_J, V16_K, ++ V17, V17_H, V17_J, V17_K, ++ V18, V18_H, V18_J, V18_K, ++ V19, V19_H, V19_J, V19_K, ++ V20, V20_H, V20_J, V20_K, ++ V21, V21_H, V21_J, V21_K, ++ V22, V22_H, V22_J, V22_K, ++ V23, V23_H, V23_J, V23_K, ++ V24, V24_H, V24_J, V24_K, ++ V25, V25_H, V25_J, V25_K, ++ V26, V26_H, V26_J, V26_K, ++ V27, V27_H, V27_J, V27_K, ++ V28, V28_H, V28_J, V28_K, ++ V29, V29_H, V29_J, V29_K, ++ V30, V30_H, V30_J, V30_K, ++ V31, V31_H, V31_J, V31_K ++); ++ ++// Class for 64 bit register f0 ++reg_class f0_reg( ++ F0, F0_H ++); ++ ++// Class for 64 bit register f1 ++reg_class f1_reg( ++ F1, F1_H ++); ++ ++// Class for 64 bit register f2 ++reg_class f2_reg( ++ F2, F2_H ++); ++ ++// Class for 64 bit register f3 ++reg_class f3_reg( ++ F3, F3_H ++); ++ ++// class for vector register v1 ++reg_class v1_reg( ++ V1, V1_H, V1_J, V1_K ++); ++ ++// class for vector register v2 ++reg_class v2_reg( ++ V2, V2_H, V2_J, V2_K ++); ++ ++// class for vector register v3 ++reg_class v3_reg( ++ V3, V3_H, V3_J, V3_K ++); ++ ++// class for vector register v4 ++reg_class v4_reg( ++ V4, V4_H, V4_J, V4_K ++); ++ ++// class for vector register v5 ++reg_class v5_reg( ++ V5, V5_H, V5_J, V5_K ++); ++ ++// class for condition codes ++reg_class reg_flags(RFLAGS); ++%} ++ ++//----------DEFINITION BLOCK--------------------------------------------------- ++// Define name --> value mappings to inform the ADLC of an integer valued name ++// Current support includes integer values in the range [0, 0x7FFFFFFF] ++// Format: ++// int_def ( , ); ++// Generated Code in ad_.hpp ++// #define () ++// // value == ++// Generated code in ad_.cpp adlc_verification() ++// assert( == , "Expect () to equal "); ++// ++ ++// we follow the ppc-aix port in using a simple cost model which ranks ++// register operations as cheap, memory ops as more expensive and ++// branches as most expensive. the first two have a low as well as a ++// normal cost. huge cost appears to be a way of saying don't do ++// something ++ ++definitions %{ ++ // The default cost (of a register move instruction). ++ int_def DEFAULT_COST ( 100, 100); ++ int_def ALU_COST ( 100, 1 * DEFAULT_COST); // unknown, const, arith, shift, slt, ++ // multi, auipc, nop, logical, move ++ int_def LOAD_COST ( 300, 3 * DEFAULT_COST); // load, fpload ++ int_def STORE_COST ( 100, 1 * DEFAULT_COST); // store, fpstore ++ int_def XFER_COST ( 300, 3 * DEFAULT_COST); // mfc, mtc, fcvt, fmove, fcmp ++ int_def BRANCH_COST ( 100, 1 * DEFAULT_COST); // branch, jmp, call ++ int_def IMUL_COST ( 1000, 10 * DEFAULT_COST); // imul ++ int_def IDIVSI_COST ( 3400, 34 * DEFAULT_COST); // idivdi ++ int_def IDIVDI_COST ( 6600, 66 * DEFAULT_COST); // idivsi ++ int_def FMUL_SINGLE_COST ( 500, 5 * DEFAULT_COST); // fadd, fmul, fmadd ++ int_def FMUL_DOUBLE_COST ( 700, 7 * DEFAULT_COST); // fadd, fmul, fmadd ++ int_def FDIV_COST ( 2000, 20 * DEFAULT_COST); // fdiv ++ int_def FSQRT_COST ( 2500, 25 * DEFAULT_COST); // fsqrt ++%} ++ ++ ++ ++//----------SOURCE BLOCK------------------------------------------------------- ++// This is a block of C++ code which provides values, functions, and ++// definitions necessary in the rest of the architecture description ++ ++source_hpp %{ ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "opto/addnode.hpp" ++#include "opto/convertnode.hpp" ++ ++extern RegMask _ANY_REG32_mask; ++extern RegMask _ANY_REG_mask; ++extern RegMask _PTR_REG_mask; ++extern RegMask _NO_SPECIAL_REG32_mask; ++extern RegMask _NO_SPECIAL_REG_mask; ++extern RegMask _NO_SPECIAL_PTR_REG_mask; ++ ++class CallStubImpl { ++ ++ //-------------------------------------------------------------- ++ //---< Used for optimization in Compile::shorten_branches >--- ++ //-------------------------------------------------------------- ++ ++ public: ++ // Size of call trampoline stub. ++ static uint size_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++ ++ // number of relocations needed by a call trampoline stub ++ static uint reloc_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++}; ++ ++class HandlerImpl { ++ ++ public: ++ ++ static int emit_exception_handler(CodeBuffer &cbuf); ++ static int emit_deopt_handler(CodeBuffer& cbuf); ++ ++ static uint size_exception_handler() { ++ return MacroAssembler::far_branch_size(); ++ } ++ ++ static uint size_deopt_handler() { ++ // count auipc + far branch ++ return NativeInstruction::instruction_size + MacroAssembler::far_branch_size(); ++ } ++}; ++ ++// predicate controlling translation of StoreCM ++bool unnecessary_storestore(const Node *storecm); ++ ++bool is_CAS(int opcode, bool maybe_volatile); ++ ++// predicate controlling translation of CompareAndSwapX ++bool needs_acquiring_load_exclusive(const Node *load); ++ ++ ++// predicate using the temp register for decoding klass ++bool maybe_use_tmp_register_decoding_klass(); ++%} ++ ++source %{ ++ ++ // Derived RegMask with conditionally allocatable registers ++ ++ RegMask _ANY_REG32_mask; ++ RegMask _ANY_REG_mask; ++ RegMask _PTR_REG_mask; ++ RegMask _NO_SPECIAL_REG32_mask; ++ RegMask _NO_SPECIAL_REG_mask; ++ RegMask _NO_SPECIAL_PTR_REG_mask; ++ ++ void reg_mask_init() { ++ ++ _ANY_REG32_mask = _ALL_REG32_mask; ++ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(x0->as_VMReg())); ++ ++ _ANY_REG_mask = _ALL_REG_mask; ++ _ANY_REG_mask.SUBTRACT(_ZR_REG_mask); ++ ++ _PTR_REG_mask = _ALL_REG_mask; ++ _PTR_REG_mask.SUBTRACT(_ZR_REG_mask); ++ ++ _NO_SPECIAL_REG32_mask = _ALL_REG32_mask; ++ _NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask); ++ ++ _NO_SPECIAL_REG_mask = _ALL_REG_mask; ++ _NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); ++ ++ _NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask; ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask); ++ ++ // x27 is not allocatable when compressed oops is on ++ if (UseCompressedOops) { ++ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x27->as_VMReg())); ++ _NO_SPECIAL_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_HEAPBASE_REG_mask); ++ } ++ ++ // x8 is not allocatable when PreserveFramePointer is on ++ if (PreserveFramePointer) { ++ _NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(x8->as_VMReg())); ++ _NO_SPECIAL_REG_mask.SUBTRACT(_FP_REG_mask); ++ _NO_SPECIAL_PTR_REG_mask.SUBTRACT(_FP_REG_mask); ++ } ++ } ++ ++ ++// predicate controlling translation of StoreCM ++// ++// returns true if a StoreStore must precede the card write otherwise ++// false ++bool unnecessary_storestore(const Node *storecm) ++{ ++ assert(storecm != NULL && storecm->Opcode() == Op_StoreCM, "expecting a StoreCM"); ++ ++ // we need to generate a membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore) ++ // between an object put and the associated card mark when we are using ++ // CMS without conditional card marking ++ ++ if (UseConcMarkSweepGC && !UseCondCardMark) { ++ return false; ++ } ++ ++ // a storestore is unnecesary in all other cases ++ ++ return true; ++} ++ ++// is_CAS(int opcode, bool maybe_volatile) ++// ++// return true if opcode is one of the possible CompareAndSwapX ++// values otherwise false. ++bool is_CAS(int opcode, bool maybe_volatile) ++{ ++ switch(opcode) { ++ // We handle these ++ case Op_CompareAndSwapI: ++ case Op_CompareAndSwapL: ++ case Op_CompareAndSwapP: ++ case Op_CompareAndSwapN: ++ case Op_CompareAndSwapB: ++ case Op_CompareAndSwapS: ++ case Op_GetAndSetI: ++ case Op_GetAndSetL: ++ case Op_GetAndSetP: ++ case Op_GetAndSetN: ++ case Op_GetAndAddI: ++ case Op_GetAndAddL: ++#if INCLUDE_SHENANDOAHGC ++ case Op_ShenandoahCompareAndSwapP: ++ case Op_ShenandoahCompareAndSwapN: ++#endif ++ return true; ++ case Op_CompareAndExchangeI: ++ case Op_CompareAndExchangeN: ++ case Op_CompareAndExchangeB: ++ case Op_CompareAndExchangeS: ++ case Op_CompareAndExchangeL: ++ case Op_CompareAndExchangeP: ++ case Op_WeakCompareAndSwapB: ++ case Op_WeakCompareAndSwapS: ++ case Op_WeakCompareAndSwapI: ++ case Op_WeakCompareAndSwapL: ++ case Op_WeakCompareAndSwapP: ++ case Op_WeakCompareAndSwapN: ++ return maybe_volatile; ++ default: ++ return false; ++ } ++} ++ ++// predicate controlling translation of CAS ++// ++// returns true if CAS needs to use an acquiring load otherwise false ++bool needs_acquiring_load_exclusive(const Node *n) ++{ ++ assert(n != NULL && is_CAS(n->Opcode(), true), "expecting a compare and swap"); ++ if (UseBarriersForVolatile) { ++ return false; ++ } ++ ++ LoadStoreNode* ldst = n->as_LoadStore(); ++ if (n != NULL && is_CAS(n->Opcode(), false)) { ++ assert(ldst != NULL && ldst->trailing_membar() != NULL, "expected trailing membar"); ++ } else { ++ return ldst != NULL && ldst->trailing_membar() != NULL; ++ } ++ // so we can just return true here ++ return true; ++} ++ ++bool maybe_use_tmp_register_decoding_klass() { ++ return !UseCompressedOops && ++ Universe::narrow_klass_base() != NULL && ++ Universe::narrow_klass_shift() != 0; ++} ++#define __ _masm. ++ ++// advance declarations for helper functions to convert register ++// indices to register objects ++ ++// the ad file has to provide implementations of certain methods ++// expected by the generic code ++// ++// REQUIRED FUNCTIONALITY ++ ++//============================================================================= ++ ++// !!!!! Special hack to get all types of calls to specify the byte offset ++// from the start of the call to the point where the return address ++// will point. ++ ++int MachCallStaticJavaNode::ret_addr_offset() ++{ ++ // call should be a simple jal ++ int off = 4; ++ return off; ++} ++ ++int MachCallDynamicJavaNode::ret_addr_offset() ++{ ++ return 28; // movptr, jal ++} ++ ++int MachCallRuntimeNode::ret_addr_offset() { ++ // for generated stubs the call will be ++ // jal(addr) ++ // or with far branches ++ // jal(trampoline_stub) ++ // for real runtime callouts it will be five instructions ++ // see riscv_enc_java_to_runtime ++ // la(t1, retaddr) ++ // la(t0, RuntimeAddress(addr)) ++ // addi(sp, sp, -2 * wordSize) ++ // sd(zr, Address(sp)) ++ // sd(t1, Address(sp, wordSize)) ++ // jalr(t0) ++ CodeBlob *cb = CodeCache::find_blob(_entry_point); ++ if (cb != NULL) { ++ return 1 * NativeInstruction::instruction_size; ++ } else { ++ return 11 * NativeInstruction::instruction_size; ++ } ++} ++ ++// Indicate if the safepoint node needs the polling page as an input ++ ++// the shared code plants the oop data at the start of the generated ++// code for the safepoint node and that needs ot be at the load ++// instruction itself. so we cannot plant a mov of the safepoint poll ++// address followed by a load. setting this to true means the mov is ++// scheduled as a prior instruction. that's better for scheduling ++// anyway. ++ ++bool SafePointNode::needs_polling_address_input() ++{ ++ return true; ++} ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ assert_cond(st != NULL); ++ st->print("BREAKPOINT"); ++} ++#endif ++ ++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ __ ebreak(); ++} ++ ++uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++ void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { ++ st->print("nop \t# %d bytes pad for loops and calls", _count); ++ } ++#endif ++ ++ void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { ++ MacroAssembler _masm(&cbuf); ++ for (int i = 0; i < _count; i++) { ++ __ nop(); ++ } ++ } ++ ++ uint MachNopNode::size(PhaseRegAlloc*) const { ++ return _count * NativeInstruction::instruction_size; ++ } ++ ++//============================================================================= ++const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; ++ ++int Compile::ConstantTable::calculate_table_base_offset() const { ++ return 0; // absolute addressing, no offset ++} ++ ++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } ++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ++ ShouldNotReachHere(); ++} ++ ++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ // Empty encoding ++} ++ ++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { ++ return 0; ++} ++ ++#ifndef PRODUCT ++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { ++ assert_cond(st != NULL); ++ st->print("-- \t// MachConstantBaseNode (empty encoding)"); ++} ++#endif ++ ++#ifndef PRODUCT ++void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ assert_cond(st != NULL && ra_ != NULL); ++ Compile* C = ra_->C; ++ ++ int framesize = C->frame_slots() << LogBytesPerInt; ++ ++ if (C->need_stack_bang(framesize)) { ++ st->print("# stack bang size=%d\n\t", framesize); ++ } ++ ++ st->print("sub sp, sp, #%d\n\t", framesize); ++ st->print("sd fp, [sp, #%d]", - 2 * wordSize); ++ st->print("sd ra, [sp, #%d]", - wordSize); ++ if (PreserveFramePointer) { st->print("\n\tsub fp, sp, #%d", 2 * wordSize); } ++} ++#endif ++ ++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ assert_cond(ra_ != NULL); ++ Compile* C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ ++ // n.b. frame size includes space for return pc and fp ++ const int framesize = C->frame_size_in_bytes(); ++ assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); ++ ++ // insert a nop at the start of the prolog so we can patch in a ++ // branch if we need to invalidate the method later ++ __ nop(); ++ ++ assert_cond(C != NULL); ++ int bangsize = C->bang_size_in_bytes(); ++ if (C->need_stack_bang(bangsize) && UseStackBanging) { ++ __ generate_stack_overflow_check(bangsize); ++ } ++ ++ __ build_frame(framesize); ++ ++ if (VerifyStackAtCalls) { ++ Unimplemented(); ++ } ++ ++ C->set_frame_complete(cbuf.insts_size()); ++ ++ if (C->has_mach_constant_base_node()) { ++ // NOTE: We set the table base offset here because users might be ++ // emitted before MachConstantBaseNode. ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ } ++} ++ ++uint MachPrologNode::size(PhaseRegAlloc* ra_) const ++{ ++ assert_cond(ra_ != NULL); ++ return MachNode::size(ra_); // too many variables; just compute it ++ // the hard way ++} ++ ++int MachPrologNode::reloc() const ++{ ++ return 0; ++} ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ assert_cond(st != NULL && ra_ != NULL); ++ Compile* C = ra_->C; ++ assert_cond(C != NULL); ++ int framesize = C->frame_size_in_bytes(); ++ ++ st->print("# pop frame %d\n\t", framesize); ++ ++ if (framesize == 0) { ++ st->print("ld ra, [sp,#%d]\n\t", (2 * wordSize)); ++ st->print("ld fp, [sp,#%d]\n\t", (3 * wordSize)); ++ st->print("add sp, sp, #%d\n\t", (2 * wordSize)); ++ } else { ++ st->print("add sp, sp, #%d\n\t", framesize); ++ st->print("ld ra, [sp,#%d]\n\t", - 2 * wordSize); ++ st->print("ld fp, [sp,#%d]\n\t", - wordSize); ++ } ++ ++ if (do_polling() && C->is_method_compilation()) { ++ st->print("# touch polling page\n\t"); ++ st->print("li t0, #0x%lx\n\t", p2i(os::get_polling_page())); ++ st->print("ld zr, [t0]"); ++ } ++} ++#endif ++ ++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ assert_cond(ra_ != NULL); ++ Compile* C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ assert_cond(C != NULL); ++ int framesize = C->frame_size_in_bytes(); ++ ++ __ remove_frame(framesize); ++ ++ if (StackReservedPages > 0 && C->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } ++ ++ if (do_polling() && C->is_method_compilation()) { ++ __ read_polling_page(t0, os::get_polling_page(), relocInfo::poll_return_type); ++ } ++} ++ ++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { ++ assert_cond(ra_ != NULL); ++ // Variable size. Determine dynamically. ++ return MachNode::size(ra_); ++} ++ ++int MachEpilogNode::reloc() const { ++ // Return number of relocatable values contained in this instruction. ++ return 1; // 1 for polling page. ++} ++const Pipeline * MachEpilogNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++int MachEpilogNode::safepoint_offset() const { ++ assert(do_polling(), "no return for this epilog node"); ++ return 4; ++} ++ ++//============================================================================= ++ ++// Figure out which register class each belongs in: rc_int, rc_float or ++// rc_stack. ++enum RC { rc_bad, rc_int, rc_float, rc_vector, rc_stack }; ++ ++static enum RC rc_class(OptoReg::Name reg) { ++ ++ if (reg == OptoReg::Bad) { ++ return rc_bad; ++ } ++ ++ // we have 30 int registers * 2 halves ++ // (t0 and t1 are omitted) ++ int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2); ++ if (reg < slots_of_int_registers) { ++ return rc_int; ++ } ++ ++ // we have 32 float register * 2 halves ++ int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers; ++ if (reg < slots_of_int_registers + slots_of_float_registers) { ++ return rc_float; ++ } ++ ++ // we have 32 vector register * 4 halves ++ int slots_of_vector_registers = VectorRegisterImpl::max_slots_per_register * VectorRegisterImpl::number_of_registers; ++ if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_vector_registers) { ++ return rc_vector; ++ } ++ ++ // Between vector regs & stack is the flags regs. ++ assert(OptoReg::is_stack(reg), "blow up if spilling flags"); ++ ++ return rc_stack; ++} ++ ++uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const { ++ assert_cond(ra_ != NULL); ++ Compile* C = ra_->C; ++ ++ // Get registers to move. ++ OptoReg::Name src_hi = ra_->get_reg_second(in(1)); ++ OptoReg::Name src_lo = ra_->get_reg_first(in(1)); ++ OptoReg::Name dst_hi = ra_->get_reg_second(this); ++ OptoReg::Name dst_lo = ra_->get_reg_first(this); ++ ++ enum RC src_hi_rc = rc_class(src_hi); ++ enum RC src_lo_rc = rc_class(src_lo); ++ enum RC dst_hi_rc = rc_class(dst_hi); ++ enum RC dst_lo_rc = rc_class(dst_lo); ++ ++ assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); ++ ++ if (src_hi != OptoReg::Bad) { ++ assert((src_lo & 1) == 0 && src_lo + 1 == src_hi && ++ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi, ++ "expected aligned-adjacent pairs"); ++ } ++ ++ if (src_lo == dst_lo && src_hi == dst_hi) { ++ return 0; // Self copy, no move. ++ } ++ ++ bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi && ++ (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi; ++ int src_offset = ra_->reg2offset(src_lo); ++ int dst_offset = ra_->reg2offset(dst_lo); ++ ++ if (bottom_type() == NULL) { ++ ShouldNotReachHere(); ++ } else if (bottom_type()->isa_vect() != NULL) { ++ uint ireg = ideal_reg(); ++ if (ireg == Op_VecA && cbuf) { ++ MacroAssembler _masm(cbuf); ++ int vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); ++ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { ++ // stack to stack ++ __ spill_copy_vector_stack_to_stack(src_offset, dst_offset, ++ vector_reg_size_in_bytes); ++ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_stack) { ++ // vpr to stack ++ __ spill(as_VectorRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo)); ++ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_vector) { ++ // stack to vpr ++ __ unspill(as_VectorRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo)); ++ } else if (src_lo_rc == rc_vector && dst_lo_rc == rc_vector) { ++ // vpr to vpr ++ __ vmv1r_v(as_VectorRegister(Matcher::_regEncode[dst_lo]), as_VectorRegister(Matcher::_regEncode[src_lo])); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } ++ } else if (cbuf != NULL) { ++ MacroAssembler _masm(cbuf); ++ switch (src_lo_rc) { ++ case rc_int: ++ if (dst_lo_rc == rc_int) { // gpr --> gpr copy ++ if (!is64 && this->ideal_reg() != Op_RegI) { // zero extended for narrow oop or klass ++ __ zero_extend(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo]), 32); ++ } else { ++ __ mv(as_Register(Matcher::_regEncode[dst_lo]), as_Register(Matcher::_regEncode[src_lo])); ++ } ++ } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy ++ if (is64) { ++ __ fmv_d_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_Register(Matcher::_regEncode[src_lo])); ++ } else { ++ __ fmv_w_x(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_Register(Matcher::_regEncode[src_lo])); ++ } ++ } else { // gpr --> stack spill ++ assert(dst_lo_rc == rc_stack, "spill to bad register class"); ++ __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset); ++ } ++ break; ++ case rc_float: ++ if (dst_lo_rc == rc_int) { // fpr --> gpr copy ++ if (is64) { ++ __ fmv_x_d(as_Register(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } else { ++ __ fmv_x_w(as_Register(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } ++ } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy ++ if (is64) { ++ __ fmv_d(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } else { ++ __ fmv_s(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } ++ } else { // fpr --> stack spill ++ assert(dst_lo_rc == rc_stack, "spill to bad register class"); ++ __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), ++ is64, dst_offset); ++ } ++ break; ++ case rc_stack: ++ if (dst_lo_rc == rc_int) { // stack --> gpr load ++ if (this->ideal_reg() == Op_RegI) { ++ __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); ++ } else { // // zero extended for narrow oop or klass ++ __ unspillu(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset); ++ } ++ } else if (dst_lo_rc == rc_float) { // stack --> fpr load ++ __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ is64, src_offset); ++ } else { // stack --> stack copy ++ assert(dst_lo_rc == rc_stack, "spill to bad register class"); ++ if (this->ideal_reg() == Op_RegI) { ++ __ unspill(t0, is64, src_offset); ++ } else { // zero extended for narrow oop or klass ++ __ unspillu(t0, is64, src_offset); ++ } ++ __ spill(t0, is64, dst_offset); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ ++ if (st != NULL) { ++ st->print("spill "); ++ if (src_lo_rc == rc_stack) { ++ st->print("[sp, #%d] -> ", src_offset); ++ } else { ++ st->print("%s -> ", Matcher::regName[src_lo]); ++ } ++ if (dst_lo_rc == rc_stack) { ++ st->print("[sp, #%d]", dst_offset); ++ } else { ++ st->print("%s", Matcher::regName[dst_lo]); ++ } ++ if (bottom_type()->isa_vect() != NULL) { ++ int vsize = 0; ++ if (ideal_reg() == Op_VecA) { ++ vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; ++ } else { ++ ShouldNotReachHere(); ++ } ++ st->print("\t# vector spill size = %d", vsize); ++ } else { ++ st->print("\t# spill size = %d", is64 ? 64 : 32); ++ } ++ } ++ ++ return 0; ++} ++ ++#ifndef PRODUCT ++void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ if (ra_ == NULL) { ++ st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx); ++ } else { ++ implementation(NULL, ra_, false, st); ++ } ++} ++#endif ++ ++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ implementation(&cbuf, ra_, false, NULL); ++} ++ ++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const { ++ assert_cond(ra_ != NULL && st != NULL); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_reg_first(this); ++ st->print("add %s, sp, #%d\t# box lock", ++ Matcher::regName[reg], offset); ++} ++#endif ++ ++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ ++ assert_cond(ra_ != NULL); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_encode(this); ++ ++ if (is_imm_in_range(offset, 12, 0)) { ++ __ addi(as_Register(reg), sp, offset); ++ } else if (is_imm_in_range(offset, 32, 0)) { ++ __ li32(t0, offset); ++ __ add(as_Register(reg), sp, t0); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++uint BoxLockNode::size(PhaseRegAlloc *ra_) const { ++ // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_). ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ ++ if (is_imm_in_range(offset, 12, 0)) { ++ return NativeInstruction::instruction_size; ++ } else { ++ return 3 * NativeInstruction::instruction_size; // lui + addiw + add; ++ } ++} ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const ++{ ++ assert_cond(st != NULL); ++ st->print_cr("# MachUEPNode"); ++ if (UseCompressedClassPointers) { ++ st->print_cr("\tlw t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); ++ if (Universe::narrow_klass_shift() != 0) { ++ st->print_cr("\tdecode_klass_not_null t0, t0"); ++ } ++ } else { ++ st->print_cr("\tld t0, [j_rarg0, oopDesc::klass_offset_in_bytes()]\t# compressed klass"); ++ } ++ st->print_cr("\tbne x10, t0, SharedRuntime::_ic_miss_stub\t # Inline cache check"); ++} ++#endif ++ ++void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const ++{ ++ // This is the unverified entry point. ++ MacroAssembler _masm(&cbuf); ++ ++ Label skip; ++ __ cmp_klass(j_rarg0, t1, t0, skip); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); ++ __ bind(skip); ++} ++ ++uint MachUEPNode::size(PhaseRegAlloc* ra_) const ++{ ++ assert_cond(ra_ != NULL); ++ return MachNode::size(ra_); ++} ++ ++// REQUIRED EMIT CODE ++ ++//============================================================================= ++ ++// Emit exception handler code. ++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) ++{ ++ // la_patchable t0, #exception_blob_entry_point ++ // jr (offset)t0 ++ // or ++ // j #exception_blob_entry_point ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_exception_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ int offset = __ offset(); ++ __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); ++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++// Emit deopt handler code. ++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) ++{ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_deopt_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ int offset = __ offset(); ++ ++ __ auipc(ra, 0); ++ __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); ++ ++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++ ++} ++// REQUIRED MATCHER CODE ++ ++//============================================================================= ++ ++const bool Matcher::match_rule_supported(int opcode) { ++ if (!has_match_rule(opcode)) { ++ return false; ++ } ++ ++ switch (opcode) { ++ case Op_StrCompressedCopy: // fall through ++ case Op_StrInflatedCopy: // fall through ++ case Op_HasNegatives: ++ return UseRVV; ++ case Op_EncodeISOArray: ++ return UseRVV && SpecialEncodeISOArray; ++ case Op_PopCountI: ++ case Op_PopCountL: ++ return UsePopCountInstruction; ++ case Op_CountLeadingZerosI: ++ case Op_CountLeadingZerosL: ++ case Op_CountTrailingZerosI: ++ case Op_CountTrailingZerosL: ++ return UseZbb; ++ } ++ ++ return true; // Per default match rules are supported. ++} ++ ++// Identify extra cases that we might want to provide match rules for vector nodes and ++// other intrinsics guarded with vector length (vlen). ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { ++ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { ++ return false; ++ } ++ ++ return op_vec_supported(opcode); ++} ++ ++const bool Matcher::has_predicated_vectors(void) { ++ return false; // not supported ++ ++} ++ ++const int Matcher::float_pressure(int default_pressure_threshold) { ++ return default_pressure_threshold; ++} ++ ++int Matcher::regnum_to_fpu_offset(int regnum) ++{ ++ Unimplemented(); ++ return 0; ++} ++ ++// Is this branch offset short enough that a short branch can be used? ++// ++// NOTE: If the platform does not provide any short branch variants, then ++// this method should return false for offset 0. ++// |---label(L1)-----| ++// |-----------------| ++// |-----------------|----------eq: float------------------- ++// |-----------------| // far_cmpD_branch | cmpD_branch ++// |------- ---------| feq; | feq; ++// |-far_cmpD_branch-| beqz done; | bnez L; ++// |-----------------| j L; | ++// |-----------------| bind(done); | ++// |-----------------|-------------------------------------- ++// |-----------------| // so shortBrSize = br_size - 4; ++// |-----------------| // so offs = offset - shortBrSize + 4; ++// |---label(L2)-----| ++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { ++ // The passed offset is relative to address of the branch. ++ int shortBrSize = br_size - 4; ++ int offs = offset - shortBrSize + 4; ++ return (-4096 <= offs && offs < 4096); ++} ++ ++const bool Matcher::isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ // Probably always true, even if a temp register is required. ++ return true; ++} ++ ++// true just means we have fast l2f conversion ++const bool Matcher::convL2FSupported(void) { ++ return true; ++} ++ ++// Vector width in bytes. ++const int Matcher::vector_width_in_bytes(BasicType bt) { ++ if (UseRVV) { ++ // The MaxVectorSize should have been set by detecting RVV max vector register size when check UseRVV. ++ // MaxVectorSize == VM_Version::_initial_vector_length ++ return MaxVectorSize; ++ } ++ return 0; ++} ++ ++// Limits on vector size (number of elements) loaded into vector. ++const int Matcher::max_vector_size(const BasicType bt) { ++ return vector_width_in_bytes(bt) / type2aelembytes(bt); ++} ++const int Matcher::min_vector_size(const BasicType bt) { ++ return max_vector_size(bt); ++} ++ ++// Vector ideal reg. ++const uint Matcher::vector_ideal_reg(int len) { ++ assert(MaxVectorSize >= len, ""); ++ if (UseRVV) { ++ return Op_VecA; ++ } ++ ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++const uint Matcher::vector_shift_count_ideal_reg(int size) { ++ switch(size) { ++ case 8: return Op_VecD; ++ case 16: return Op_VecX; ++ default: ++ if (size == vector_width_in_bytes(T_BYTE)) { ++ return Op_VecA; ++ } ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++const bool Matcher::supports_scalable_vector() { ++ return UseRVV; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return Matcher::max_vector_size(bt); ++} ++ ++// AES support not yet implemented ++const bool Matcher::pass_original_key_for_aes() { ++ return false; ++} ++ ++// riscv supports misaligned vectors store/load. ++const bool Matcher::misaligned_vectors_ok() { ++ return true; ++} ++ ++// false => size gets scaled to BytesPerLong, ok. ++const bool Matcher::init_array_count_is_in_bytes = false; ++ ++// Use conditional move (CMOVL) ++const int Matcher::long_cmove_cost() { ++ // long cmoves are no more expensive than int cmoves ++ return 0; ++} ++ ++const int Matcher::float_cmove_cost() { ++ // float cmoves are no more expensive than int cmoves ++ return 0; ++} ++ ++// Does the CPU require late expand (see block.cpp for description of late expand)? ++const bool Matcher::require_postalloc_expand = false; ++ ++// Do we need to mask the count passed to shift instructions or does ++// the cpu only look at the lower 5/6 bits anyway? ++const bool Matcher::need_masked_shift_count = false; ++ ++// This affects two different things: ++// - how Decode nodes are matched ++// - how ImplicitNullCheck opportunities are recognized ++// If true, the matcher will try to remove all Decodes and match them ++// (as operands) into nodes. NullChecks are not prepared to deal with ++// Decodes by final_graph_reshaping(). ++// If false, final_graph_reshaping() forces the decode behind the Cmp ++// for a NullCheck. The matcher matches the Decode node into a register. ++// Implicit_null_check optimization moves the Decode along with the ++// memory operation back up before the NullCheck. ++bool Matcher::narrow_oop_use_complex_address() { ++ return Universe::narrow_oop_shift() == 0; ++} ++ ++bool Matcher::narrow_klass_use_complex_address() { ++// TODO ++// decide whether we need to set this to true ++ return false; ++} ++ ++bool Matcher::const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP in simple compressed oops mode. ++ return Universe::narrow_oop_base() == NULL; ++} ++ ++bool Matcher::const_klass_prefer_decode() { ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ return Universe::narrow_klass_base() == NULL; ++} ++ ++// Is it better to copy float constants, or load them directly from ++// memory? Intel can load a float constant from a direct address, ++// requiring no extra registers. Most RISCs will have to materialize ++// an address into a register first, so they would do better to copy ++// the constant from stack. ++const bool Matcher::rematerialize_float_constants = false; ++ ++// If CPU can load and store mis-aligned doubles directly then no ++// fixup is needed. Else we split the double into 2 integer pieces ++// and move it piece-by-piece. Only happens when passing doubles into ++// C code as the Java calling convention forces doubles to be aligned. ++const bool Matcher::misaligned_doubles_ok = true; ++ ++// No-op on amd64 ++void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { ++ Unimplemented(); ++} ++ ++// Advertise here if the CPU requires explicit rounding operations to ++// implement the UseStrictFP mode. ++const bool Matcher::strict_fp_requires_explicit_rounding = false; ++ ++// Are floats converted to double when stored to stack during ++// deoptimization? ++bool Matcher::float_in_double() { return false; } ++ ++// Do ints take an entire long register or just half? ++// The relevant question is how the int is callee-saved: ++// the whole long is written but de-opt'ing will have to extract ++// the relevant 32 bits. ++const bool Matcher::int_in_long = true; ++ ++// Return whether or not this register is ever used as an argument. ++// This function is used on startup to build the trampoline stubs in ++// generateOptoStub. Registers not mentioned will be killed by the VM ++// call in the trampoline, and arguments in those registers not be ++// available to the callee. ++bool Matcher::can_be_java_arg(int reg) ++{ ++ return ++ reg == R10_num || reg == R10_H_num || ++ reg == R11_num || reg == R11_H_num || ++ reg == R12_num || reg == R12_H_num || ++ reg == R13_num || reg == R13_H_num || ++ reg == R14_num || reg == R14_H_num || ++ reg == R15_num || reg == R15_H_num || ++ reg == R16_num || reg == R16_H_num || ++ reg == R17_num || reg == R17_H_num || ++ reg == F10_num || reg == F10_H_num || ++ reg == F11_num || reg == F11_H_num || ++ reg == F12_num || reg == F12_H_num || ++ reg == F13_num || reg == F13_H_num || ++ reg == F14_num || reg == F14_H_num || ++ reg == F15_num || reg == F15_H_num || ++ reg == F16_num || reg == F16_H_num || ++ reg == F17_num || reg == F17_H_num; ++} ++ ++bool Matcher::is_spillable_arg(int reg) ++{ ++ return can_be_java_arg(reg); ++} ++ ++bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { ++ return false; ++} ++ ++RegMask Matcher::divI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for MODI projection of divmodI. ++RegMask Matcher::modI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for DIVL projection of divmodL. ++RegMask Matcher::divL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for MODL projection of divmodL. ++RegMask Matcher::modL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++const RegMask Matcher::method_handle_invoke_SP_save_mask() { ++ return FP_REG_mask(); ++} ++ ++bool size_fits_all_mem_uses(AddPNode* addp, int shift) { ++ assert_cond(addp != NULL); ++ for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) { ++ Node* u = addp->fast_out(i); ++ if (u != NULL && u->is_Mem()) { ++ int opsize = u->as_Mem()->memory_size(); ++ assert(opsize > 0, "unexpected memory operand size"); ++ if (u->as_Mem()->memory_size() != (1 << shift)) { ++ return false; ++ } ++ } ++ } ++ return true; ++} ++ ++const bool Matcher::convi2l_type_required = false; ++ ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? ++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++ assert_cond(m != NULL); ++ if (clone_base_plus_offset_address(m, mstack, address_visited)) { ++ return true; ++ } ++ ++ Node *off = m->in(AddPNode::Offset); ++ if (off != NULL && off->Opcode() == Op_LShiftL && off->in(2)->is_Con() && ++ size_fits_all_mem_uses(m, off->in(2)->get_int()) && ++ // Are there other uses besides address expressions? ++ !is_visited(off)) { ++ address_visited.set(off->_idx); // Flag as address_visited ++ mstack.push(off->in(2), Visit); ++ Node *conv = off->in(1); ++ if (conv->Opcode() == Op_ConvI2L && ++ // Are there other uses besides address expressions? ++ !is_visited(conv)) { ++ address_visited.set(conv->_idx); // Flag as address_visited ++ mstack.push(conv->in(1), Pre_Visit); ++ } else { ++ mstack.push(conv, Pre_Visit); ++ } ++ address_visited.test_set(m->_idx); // Flag as address_visited ++ mstack.push(m->in(AddPNode::Address), Pre_Visit); ++ mstack.push(m->in(AddPNode::Base), Pre_Visit); ++ return true; ++ } else if (off != NULL && off->Opcode() == Op_ConvI2L && ++ // Are there other uses besides address expressions? ++ !is_visited(off)) { ++ address_visited.test_set(m->_idx); // Flag as address_visited ++ address_visited.set(off->_idx); // Flag as address_visited ++ mstack.push(off->in(1), Pre_Visit); ++ mstack.push(m->in(AddPNode::Address), Pre_Visit); ++ mstack.push(m->in(AddPNode::Base), Pre_Visit); ++ return true; ++ } ++ return false; ++} ++ ++void Compile::reshape_address(AddPNode* addp) { ++} ++ ++%} ++ ++ ++ ++//----------ENCODING BLOCK----------------------------------------------------- ++// This block specifies the encoding classes used by the compiler to ++// output byte streams. Encoding classes are parameterized macros ++// used by Machine Instruction Nodes in order to generate the bit ++// encoding of the instruction. Operands specify their base encoding ++// interface with the interface keyword. There are currently ++// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & ++// COND_INTER. REG_INTER causes an operand to generate a function ++// which returns its register number when queried. CONST_INTER causes ++// an operand to generate a function which returns the value of the ++// constant when queried. MEMORY_INTER causes an operand to generate ++// four functions which return the Base Register, the Index Register, ++// the Scale Value, and the Offset Value of the operand when queried. ++// COND_INTER causes an operand to generate six functions which return ++// the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional ++// instruction. ++// ++// Instructions specify two basic values for encoding. Again, a ++// function is available to check if the constant displacement is an ++// oop. They use the ins_encode keyword to specify their encoding ++// classes (which must be a sequence of enc_class names, and their ++// parameters, specified in the encoding block), and they use the ++// opcode keyword to specify, in order, their primary, secondary, and ++// tertiary opcode. Only the opcode sections which a particular ++// instruction needs for encoding need to be specified. ++encode %{ ++ // BEGIN Non-volatile memory access ++ ++ enc_class riscv_enc_li_imm(iRegIorL dst, immIorL src) %{ ++ MacroAssembler _masm(&cbuf); ++ int64_t con = (int64_t)$src$$constant; ++ Register dst_reg = as_Register($dst$$reg); ++ __ mv(dst_reg, con); ++ %} ++ ++ enc_class riscv_enc_mov_p(iRegP dst, immP src) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ address con = (address)$src$$constant; ++ if (con == NULL || con == (address)1) { ++ ShouldNotReachHere(); ++ } else { ++ relocInfo::relocType rtype = $src->constant_reloc(); ++ if (rtype == relocInfo::oop_type) { ++ __ movoop(dst_reg, (jobject)con, /*immediate*/true); ++ } else if (rtype == relocInfo::metadata_type) { ++ __ mov_metadata(dst_reg, (Metadata*)con); ++ } else { ++ assert(rtype == relocInfo::none, "unexpected reloc type"); ++ __ mv(dst_reg, $src$$constant); ++ } ++ } ++ %} ++ ++ enc_class riscv_enc_mov_p1(iRegP dst) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ __ mv(dst_reg, 1); ++ %} ++ ++ enc_class riscv_enc_mov_poll_page(iRegP dst, immPollPage src) %{ ++ MacroAssembler _masm(&cbuf); ++ int32_t offset = 0; ++ address page = (address)$src$$constant; ++ unsigned long align = (unsigned long)page & 0xfff; ++ assert(align == 0, "polling page must be page aligned"); ++ Register dst_reg = as_Register($dst$$reg); ++ __ la_patchable(dst_reg, Address(page, relocInfo::poll_type), offset); ++ __ addi(dst_reg, dst_reg, offset); ++ %} ++ ++ enc_class riscv_enc_mov_byte_map_base(iRegP dst) %{ ++ MacroAssembler _masm(&cbuf); ++ __ load_byte_map_base($dst$$Register); ++ %} ++ ++ enc_class riscv_enc_mov_n(iRegN dst, immN src) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ address con = (address)$src$$constant; ++ if (con == NULL) { ++ ShouldNotReachHere(); ++ } else { ++ relocInfo::relocType rtype = $src->constant_reloc(); ++ assert(rtype == relocInfo::oop_type, "unexpected reloc type"); ++ __ set_narrow_oop(dst_reg, (jobject)con); ++ } ++ %} ++ ++ enc_class riscv_enc_mov_zero(iRegNorP dst) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ __ mv(dst_reg, zr); ++ %} ++ ++ enc_class riscv_enc_mov_nk(iRegN dst, immNKlass src) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ address con = (address)$src$$constant; ++ if (con == NULL) { ++ ShouldNotReachHere(); ++ } else { ++ relocInfo::relocType rtype = $src->constant_reloc(); ++ assert(rtype == relocInfo::metadata_type, "unexpected reloc type"); ++ __ set_narrow_klass(dst_reg, (Klass *)con); ++ } ++ %} ++ ++ enc_class riscv_enc_cmpxchgw(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ ++ MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} ++ ++ enc_class riscv_enc_cmpxchgn(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ ++ MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} ++ ++ enc_class riscv_enc_cmpxchg(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ ++ MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} ++ ++ enc_class riscv_enc_cmpxchgw_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ ++ MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} ++ ++ enc_class riscv_enc_cmpxchgn_acq(iRegINoSp res, memory mem, iRegINoSp oldval, iRegINoSp newval) %{ ++ MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} ++ ++ enc_class riscv_enc_cmpxchg_acq(iRegINoSp res, memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{ ++ MacroAssembler _masm(&cbuf); ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result as bool*/ true); ++ %} ++ ++ // compare and branch instruction encodings ++ ++ enc_class riscv_enc_j(label lbl) %{ ++ MacroAssembler _masm(&cbuf); ++ Label* L = $lbl$$label; ++ __ j(*L); ++ %} ++ ++ enc_class riscv_enc_far_cmpULtGe_imm0_branch(cmpOpULtGe cmp, iRegIorL op1, label lbl) %{ ++ MacroAssembler _masm(&cbuf); ++ Label* L = $lbl$$label; ++ switch($cmp$$cmpcode) { ++ case(BoolTest::ge): ++ __ j(*L); ++ break; ++ case(BoolTest::lt): ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ // call instruction encodings ++ ++ enc_class riscv_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result) %{ ++ Register sub_reg = as_Register($sub$$reg); ++ Register super_reg = as_Register($super$$reg); ++ Register temp_reg = as_Register($temp$$reg); ++ Register result_reg = as_Register($result$$reg); ++ Register cr_reg = t1; ++ ++ Label miss; ++ Label done; ++ MacroAssembler _masm(&cbuf); ++ __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg, ++ NULL, &miss); ++ if ($primary) { ++ __ mv(result_reg, zr); ++ } else { ++ __ mv(cr_reg, zr); ++ __ j(done); ++ } ++ ++ __ bind(miss); ++ if (!$primary) { ++ __ mv(cr_reg, 1); ++ } ++ ++ __ bind(done); ++ %} ++ ++ enc_class riscv_enc_java_static_call(method meth) %{ ++ MacroAssembler _masm(&cbuf); ++ ++ address addr = (address)$meth$$method; ++ address call = NULL; ++ assert_cond(addr != NULL); ++ if (!_method) { ++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. ++ call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type)); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } else { ++ int method_index = resolved_method_index(cbuf); ++ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) ++ : static_call_Relocation::spec(method_index); ++ call = __ trampoline_call(Address(addr, rspec)); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ // Emit stub for static call ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, call); ++ if (stub == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } ++ ++ %} ++ ++ enc_class riscv_enc_java_dynamic_call(method meth) %{ ++ MacroAssembler _masm(&cbuf); ++ int method_index = resolved_method_index(cbuf); ++ address call = __ ic_call((address)$meth$$method, method_index); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ++ enc_class riscv_enc_call_epilog() %{ ++ MacroAssembler _masm(&cbuf); ++ if (VerifyStackAtCalls) { ++ // Check that stack depth is unchanged: find majik cookie on stack ++ __ call_Unimplemented(); ++ } ++ %} ++ ++ enc_class riscv_enc_java_to_runtime(method meth) %{ ++ MacroAssembler _masm(&cbuf); ++ ++ // some calls to generated routines (arraycopy code) are scheduled ++ // by C2 as runtime calls. if so we can call them using a jr (they ++ // will be in a reachable segment) otherwise we have to use a jalr ++ // which loads the absolute address into a register. ++ address entry = (address)$meth$$method; ++ CodeBlob *cb = CodeCache::find_blob(entry); ++ if (cb != NULL) { ++ address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type)); ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } else { ++ Label retaddr; ++ __ la(t1, retaddr); ++ __ la(t0, RuntimeAddress(entry)); ++ // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc() ++ __ addi(sp, sp, -2 * wordSize); ++ __ sd(t1, Address(sp, wordSize)); ++ __ jalr(t0); ++ __ bind(retaddr); ++ __ addi(sp, sp, 2 * wordSize); ++ } ++ %} ++ ++ // using the cr register as the bool result: 0 for success; others failed. ++ enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register flag = t1; ++ Register oop = as_Register($object$$reg); ++ Register box = as_Register($box$$reg); ++ Register disp_hdr = as_Register($tmp$$reg); ++ Register tmp = as_Register($tmp2$$reg); ++ Label cont; ++ Label object_has_monitor; ++ ++ assert_different_registers(oop, box, tmp, disp_hdr, t0); ++ ++ // Load markOop from object into displaced_header. ++ __ ld(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes())); ++ ++ // Always do locking in runtime. ++ if (EmitSync & 0x01) { ++ __ mv(flag, 1); ++ return; ++ } ++ ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ // ignore slow case here ++ __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont, /*slow_case*/NULL, NULL, flag); ++ } ++ ++ // Check for existing monitor ++ if ((EmitSync & 0x02) == 0) { ++ __ andi(t0, disp_hdr, markOopDesc::monitor_value); ++ __ bnez(t0, object_has_monitor); ++ } ++ ++ // Set tmp to be (markOop of object | UNLOCK_VALUE). ++ __ ori(tmp, disp_hdr, markOopDesc::unlocked_value); ++ ++ // Initialize the box. (Must happen before we update the object mark!) ++ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ ++ // Compare object markOop with an unlocked value (tmp) and if ++ // equal exchange the stack address of our box with object markOop. ++ // On failure disp_hdr contains the possibly locked markOop. ++ __ cmpxchg(/*memory address*/oop, /*expected value*/tmp, /*new value*/box, Assembler::int64, Assembler::aq, ++ Assembler::rl, /*result*/disp_hdr); ++ __ mv(flag, zr); ++ __ beq(disp_hdr, tmp, cont); // prepare zero flag and goto cont if we won the cas ++ ++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); ++ ++ // If the compare-and-exchange succeeded, then we found an unlocked ++ // object, will have now locked it will continue at label cont ++ // We did not see an unlocked object so try the fast recursive case. ++ ++ // Check if the owner is self by comparing the value in the ++ // markOop of object (disp_hdr) with the stack pointer. ++ __ sub(disp_hdr, disp_hdr, sp); ++ __ mv(tmp, (intptr_t) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place)); ++ // If (mark & lock_mask) == 0 and mark - sp < page_size, we are stack-locking and goto cont, ++ // hence we can store 0 as the displaced header in the box, which indicates that it is a ++ // recursive lock. ++ __ andr(tmp/*==0?*/, disp_hdr, tmp); ++ __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ __ mv(flag, tmp); // we can use the value of tmp as the result here ++ ++ if ((EmitSync & 0x02) == 0) { ++ __ j(cont); ++ ++ // Handle existing monitor. ++ __ bind(object_has_monitor); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ // ++ // Try to CAS m->owner from NULL to current thread. ++ __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value)); ++ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, ++ Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) ++ ++ // Store a non-null value into the box to avoid looking like a re-entrant ++ // lock. The fast-path monitor unlock code checks for ++ // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the ++ // relevant bit set, and also matches ObjectSynchronizer::slow_enter. ++ __ mv(tmp, (address)markOopDesc::unused_mark()); ++ __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ } ++ ++ __ bind(cont); ++ %} ++ ++ // using cr flag to indicate the fast_unlock result: 0 for success; others failed. ++ enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register flag = t1; ++ Register oop = as_Register($object$$reg); ++ Register box = as_Register($box$$reg); ++ Register disp_hdr = as_Register($tmp$$reg); ++ Register tmp = as_Register($tmp2$$reg); ++ Label cont; ++ Label object_has_monitor; ++ ++ assert_different_registers(oop, box, tmp, disp_hdr, flag); ++ ++ // Always do locking in runtime. ++ if (EmitSync & 0x01) { ++ __ mv(flag, 1); ++ return; ++ } ++ ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ __ biased_locking_exit(oop, tmp, cont, flag); ++ } ++ ++ // Find the lock address and load the displaced header from the stack. ++ __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); ++ ++ // If the displaced header is 0, we have a recursive unlock. ++ __ mv(flag, disp_hdr); ++ __ beqz(disp_hdr, cont); ++ ++ // Handle existing monitor. ++ if ((EmitSync & 0x02) == 0) { ++ __ ld(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); ++ __ andi(t0, disp_hdr, markOopDesc::monitor_value); ++ __ bnez(t0, object_has_monitor); ++ } ++ ++ // Check if it is still a light weight lock, this is true if we ++ // see the stack address of the basicLock in the markOop of the ++ // object. ++ ++ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, ++ Assembler::rl, /*result*/tmp); ++ __ xorr(flag, box, tmp); // box == tmp if cas succeeds ++ __ j(cont); ++ ++ assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); ++ ++ // Handle existing monitor. ++ if ((EmitSync & 0x02) == 0) { ++ __ bind(object_has_monitor); ++ __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor ++ __ ld(flag, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); ++ __ xorr(flag, flag, xthread); // Will be 0 if we are the owner. ++ __ orr(flag, flag, disp_hdr); // Will be 0 if there are 0 recursions ++ __ bnez(flag, cont); ++ ++ __ ld(flag, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes())); ++ __ ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes())); ++ __ orr(flag, flag, disp_hdr); // Will be 0 if both are 0. ++ __ bnez(flag, cont); ++ // need a release store here ++ __ la(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sd(zr, Address(tmp)); // set unowned ++ } ++ ++ __ bind(cont); ++ %} ++ ++ // arithmetic encodings ++ ++ enc_class riscv_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivl(dst_reg, src1_reg, src2_reg, false); ++ %} ++ ++ enc_class riscv_enc_div(iRegI dst, iRegI src1, iRegI src2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivq(dst_reg, src1_reg, src2_reg, false); ++ %} ++ ++ enc_class riscv_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivl(dst_reg, src1_reg, src2_reg, true); ++ %} ++ ++ enc_class riscv_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{ ++ MacroAssembler _masm(&cbuf); ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ __ corrected_idivq(dst_reg, src1_reg, src2_reg, true); ++ %} ++ ++ enc_class riscv_enc_tail_call(iRegP jump_target) %{ ++ MacroAssembler _masm(&cbuf); ++ Register target_reg = as_Register($jump_target$$reg); ++ __ jr(target_reg); ++ %} ++ ++ enc_class riscv_enc_tail_jmp(iRegP jump_target) %{ ++ MacroAssembler _masm(&cbuf); ++ Register target_reg = as_Register($jump_target$$reg); ++ // exception oop should be in x10 ++ // ret addr has been popped into ra ++ // callee expects it in x13 ++ __ mv(x13, ra); ++ __ jr(target_reg); ++ %} ++ ++ enc_class riscv_enc_rethrow() %{ ++ MacroAssembler _masm(&cbuf); ++ __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); ++ %} ++ ++ enc_class riscv_enc_ret() %{ ++ MacroAssembler _masm(&cbuf); ++ __ ret(); ++ %} ++ ++%} ++ ++//----------FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add OptoReg::stack0()) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | | | 3 ++// | | +--------+ ++// V | | old out| Empty on Intel, window on Sparc ++// | old |preserve| Must be even aligned. ++// | SP-+--------+----> Matcher::_old_SP, even aligned ++// | | in | 3 area for Intel ret address ++// Owned by |preserve| Empty on Sparc. ++// SELF +--------+ ++// | | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> OptoReg::stack0(), even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by +--------+ ++// CALLEE | new out| 6 Empty on Intel, window on Sparc ++// | new |preserve| Must be even-aligned. ++// | SP-+--------+----> Matcher::_new_SP, even aligned ++// | | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// (the latter is true on Intel but is it false on RISCV?) ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. ++ ++frame %{ ++ // What direction does stack grow in (assumed to be same for C & Java) ++ stack_direction(TOWARDS_LOW); ++ ++ // These three registers define part of the calling convention ++ // between compiled code and the interpreter. ++ ++ // Inline Cache Register or methodOop for I2C. ++ inline_cache_reg(R31); ++ ++ // Method Oop Register when calling interpreter. ++ interpreter_method_oop_reg(R31); ++ ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset); ++ ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ // VMRegImpl::slots_per_word = wordSize / stack_slot_size = 8 / 4 = 2 ++ sync_stack_slots(1 * VMRegImpl::slots_per_word); ++ ++ // Compiled code's Frame Pointer ++ frame_pointer(R2); ++ ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ interpreter_frame_pointer(R8); ++ ++ // Stack alignment requirement ++ stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) ++ ++ // Number of stack slots between incoming argument block and the start of ++ // a new frame. The PROLOG must add this many slots to the stack. The ++ // EPILOG must remove this many slots. ++ // RISCV needs two words for RA (return address) and FP (frame pointer). ++ in_preserve_stack_slots(2 * VMRegImpl::slots_per_word); ++ ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes / BytesPerInt); ++ ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ // TODO this may well be correct but need to check why that - 2 is there ++ // ppc port uses 0 but we definitely need to allow for fixed_slots ++ // which folds in the space used for monitors ++ return_addr(STACK - 2 + ++ align_up((Compile::current()->in_preserve_stack_slots() + ++ Compile::current()->fixed_slots()), ++ stack_alignment_in_slots())); ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ calling_convention ++ %{ ++ // No difference between ingoing/outgoing just pass false ++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); ++ %} ++ ++ c_calling_convention ++ %{ ++ // This is obviously always outgoing ++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length); ++ %} ++ ++ // Location of compiled Java return values. Same as C for now. ++ return_value ++ %{ ++ assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, ++ "only return normal values"); ++ ++ static const int lo[Op_RegL + 1] = { // enum name ++ 0, // Op_Node ++ 0, // Op_Set ++ R10_num, // Op_RegN ++ R10_num, // Op_RegI ++ R10_num, // Op_RegP ++ F10_num, // Op_RegF ++ F10_num, // Op_RegD ++ R10_num // Op_RegL ++ }; ++ ++ static const int hi[Op_RegL + 1] = { // enum name ++ 0, // Op_Node ++ 0, // Op_Set ++ OptoReg::Bad, // Op_RegN ++ OptoReg::Bad, // Op_RegI ++ R10_H_num, // Op_RegP ++ OptoReg::Bad, // Op_RegF ++ F10_H_num, // Op_RegD ++ R10_H_num // Op_RegL ++ }; ++ ++ return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); ++ %} ++%} ++ ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(1); // Required cost attribute ++ ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_short_branch(0); // Required flag: is this instruction ++ // a non-matching short branch variant ++ // of some long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must ++ // be a power of 2) specifies the ++ // alignment that some part of the ++ // instruction (not necessarily the ++ // start) requires. If > 1, a ++ // compute_padding() function must be ++ // provided for the instruction ++ ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. ++ ++//----------Simple Operands---------------------------------------------------- ++ ++// Integer operands 32 bit ++// 32 bit immediate ++operand immI() ++%{ ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 32 bit zero ++operand immI0() ++%{ ++ predicate(n->get_int() == 0); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 32 bit unit increment ++operand immI_1() ++%{ ++ predicate(n->get_int() == 1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 32 bit unit decrement ++operand immI_M1() ++%{ ++ predicate(n->get_int() == -1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Unsigned Integer Immediate: 6-bit int, greater than 32 ++operand uimmI6_ge32() %{ ++ predicate(((unsigned int)(n->get_int()) < 64) && (n->get_int() >= 32)); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_le_4() ++%{ ++ predicate(n->get_int() <= 4); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_16() ++%{ ++ predicate(n->get_int() == 16); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_24() ++%{ ++ predicate(n->get_int() == 24); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_31() ++%{ ++ predicate(n->get_int() == 31); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_32() ++%{ ++ predicate(n->get_int() == 32); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_63() ++%{ ++ predicate(n->get_int() == 63); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_64() ++%{ ++ predicate(n->get_int() == 64); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 32 bit integer valid for add immediate ++operand immIAdd() ++%{ ++ predicate(Assembler::operand_valid_for_add_immediate((long)n->get_int())); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 32 bit integer valid for sub immediate ++operand immISub() ++%{ ++ predicate(Assembler::operand_valid_for_add_immediate(-(long)n->get_int())); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 5 bit signed value. ++operand immI5() ++%{ ++ predicate(n->get_int() <= 15 && n->get_int() >= -16); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 5 bit signed value (simm5) ++operand immL5() ++%{ ++ predicate(n->get_long() <= 15 && n->get_long() >= -16); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Integer operands 64 bit ++// 64 bit immediate ++operand immL() ++%{ ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 64 bit zero ++operand immL0() ++%{ ++ predicate(n->get_long() == 0); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer operands ++// Pointer Immediate ++operand immP() ++%{ ++ match(ConP); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immP0() ++%{ ++ predicate(n->get_ptr() == 0); ++ match(ConP); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate One ++// this is used in object initialization (initial object header) ++operand immP_1() ++%{ ++ predicate(n->get_ptr() == 1); ++ match(ConP); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Polling Page Pointer Immediate ++operand immPollPage() ++%{ ++ predicate((address)n->get_ptr() == os::get_polling_page()); ++ match(ConP); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Card Table Byte Map Base ++operand immByteMapBase() ++%{ ++ // Get base of card map ++ predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && ++ (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base()); ++ match(ConP); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Int Immediate: low 16-bit mask ++operand immI_16bits() ++%{ ++ predicate(n->get_int() == 0xFFFF); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate: low 32-bit mask ++operand immL_32bits() ++%{ ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 64 bit unit decrement ++operand immL_M1() ++%{ ++ predicate(n->get_long() == -1); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++ ++// 32 bit offset of pc in thread anchor ++ ++operand immL_pc_off() ++%{ ++ predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) + ++ in_bytes(JavaFrameAnchor::last_Java_pc_offset())); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 64 bit integer valid for add immediate ++operand immLAdd() ++%{ ++ predicate(Assembler::operand_valid_for_add_immediate(n->get_long())); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 64 bit integer valid for sub immediate ++operand immLSub() ++%{ ++ predicate(Assembler::operand_valid_for_add_immediate(-(n->get_long()))); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Narrow pointer operands ++// Narrow Pointer Immediate ++operand immN() ++%{ ++ match(ConN); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Narrow NULL Pointer Immediate ++operand immN0() ++%{ ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immNKlass() ++%{ ++ match(ConNKlass); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Float and Double operands ++// Double Immediate ++operand immD() ++%{ ++ match(ConD); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double Immediate: +0.0d ++operand immD0() ++%{ ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Float Immediate ++operand immF() ++%{ ++ match(ConF); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Float Immediate: +0.0f. ++operand immF0() ++%{ ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immIOffset() ++%{ ++ predicate(is_imm_in_range(n->get_int(), 12, 0)); ++ match(ConI); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immLOffset() ++%{ ++ predicate(is_imm_in_range(n->get_long(), 12, 0)); ++ match(ConL); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Scale values ++operand immIScale() ++%{ ++ predicate(1 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Integer 32 bit Register Operands ++operand iRegI() ++%{ ++ constraint(ALLOC_IN_RC(any_reg32)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Integer 32 bit Register not Special ++operand iRegINoSp() ++%{ ++ constraint(ALLOC_IN_RC(no_special_reg32)); ++ match(RegI); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Register R10 only ++operand iRegI_R10() ++%{ ++ constraint(ALLOC_IN_RC(int_r10_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Register R12 only ++operand iRegI_R12() ++%{ ++ constraint(ALLOC_IN_RC(int_r12_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Register R13 only ++operand iRegI_R13() ++%{ ++ constraint(ALLOC_IN_RC(int_r13_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Register R14 only ++operand iRegI_R14() ++%{ ++ constraint(ALLOC_IN_RC(int_r14_reg)); ++ match(RegI); ++ match(iRegINoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Integer 64 bit Register Operands ++operand iRegL() ++%{ ++ constraint(ALLOC_IN_RC(any_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Integer 64 bit Register not Special ++operand iRegLNoSp() ++%{ ++ constraint(ALLOC_IN_RC(no_special_reg)); ++ match(RegL); ++ match(iRegL_R10); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Long 64 bit Register R28 only ++operand iRegL_R28() ++%{ ++ constraint(ALLOC_IN_RC(r28_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Long 64 bit Register R29 only ++operand iRegL_R29() ++%{ ++ constraint(ALLOC_IN_RC(r29_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Long 64 bit Register R30 only ++operand iRegL_R30() ++%{ ++ constraint(ALLOC_IN_RC(r30_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer Register Operands ++// Pointer Register ++operand iRegP() ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ match(iRegP_R10); ++ match(javaThread_RegP); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer 64 bit Register not Special ++operand iRegPNoSp() ++%{ ++ constraint(ALLOC_IN_RC(no_special_ptr_reg)); ++ match(RegP); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand iRegP_R10() ++%{ ++ constraint(ALLOC_IN_RC(r10_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer 64 bit Register R11 only ++operand iRegP_R11() ++%{ ++ constraint(ALLOC_IN_RC(r11_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand iRegP_R12() ++%{ ++ constraint(ALLOC_IN_RC(r12_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer 64 bit Register R13 only ++operand iRegP_R13() ++%{ ++ constraint(ALLOC_IN_RC(r13_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand iRegP_R14() ++%{ ++ constraint(ALLOC_IN_RC(r14_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand iRegP_R15() ++%{ ++ constraint(ALLOC_IN_RC(r15_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand iRegP_R16() ++%{ ++ constraint(ALLOC_IN_RC(r16_reg)); ++ match(RegP); ++ // match(iRegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer 64 bit Register R28 only ++operand iRegP_R28() ++%{ ++ constraint(ALLOC_IN_RC(r28_reg)); ++ match(RegP); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer Register Operands ++// Narrow Pointer Register ++operand iRegN() ++%{ ++ constraint(ALLOC_IN_RC(any_reg32)); ++ match(RegN); ++ match(iRegNNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Integer 64 bit Register not Special ++operand iRegNNoSp() ++%{ ++ constraint(ALLOC_IN_RC(no_special_reg32)); ++ match(RegN); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// heap base register -- used for encoding immN0 ++operand iRegIHeapbase() ++%{ ++ constraint(ALLOC_IN_RC(heapbase_reg)); ++ match(RegI); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Long 64 bit Register R10 only ++operand iRegL_R10() ++%{ ++ constraint(ALLOC_IN_RC(r10_reg)); ++ match(RegL); ++ match(iRegLNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Float Register ++// Float register operands ++operand fRegF() ++%{ ++ constraint(ALLOC_IN_RC(float_reg)); ++ match(RegF); ++ ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Double Register ++// Double register operands ++operand fRegD() ++%{ ++ constraint(ALLOC_IN_RC(double_reg)); ++ match(RegD); ++ ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Generic vector class. This will be used for ++// all vector operands. ++operand vReg() ++%{ ++ constraint(ALLOC_IN_RC(vectora_reg)); ++ match(VecA); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand vReg_V1() ++%{ ++ constraint(ALLOC_IN_RC(v1_reg)); ++ match(VecA); ++ match(vReg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand vReg_V2() ++%{ ++ constraint(ALLOC_IN_RC(v2_reg)); ++ match(VecA); ++ match(vReg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand vReg_V3() ++%{ ++ constraint(ALLOC_IN_RC(v3_reg)); ++ match(VecA); ++ match(vReg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand vReg_V4() ++%{ ++ constraint(ALLOC_IN_RC(v4_reg)); ++ match(VecA); ++ match(vReg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand vReg_V5() ++%{ ++ constraint(ALLOC_IN_RC(v5_reg)); ++ match(VecA); ++ match(vReg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Java Thread Register ++operand javaThread_RegP(iRegP reg) ++%{ ++ constraint(ALLOC_IN_RC(java_thread_reg)); // java_thread_reg ++ match(reg); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//----------Memory Operands---------------------------------------------------- ++// RISCV has only base_plus_offset and literal address mode, so no need to use ++// index and scale. Here set index as 0xffffffff and scale as 0x0. ++operand indirect(iRegP reg) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(reg); ++ op_cost(0); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indOffI(iRegP reg, immIOffset off) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indOffL(iRegP reg, immLOffset off) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indirectN(iRegN reg) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(DecodeN reg); ++ op_cost(0); ++ format %{ "[$reg]\t# narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indOffIN(iRegN reg, immIOffset off) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP (DecodeN reg) off); ++ op_cost(0); ++ format %{ "[$reg, $off]\t# narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indOffLN(iRegN reg, immLOffset off) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP (DecodeN reg) off); ++ op_cost(0); ++ format %{ "[$reg, $off]\t# narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// RISCV opto stubs need to write to the pc slot in the thread anchor ++operand thread_anchor_pc(javaThread_RegP reg, immL_pc_off off) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++ ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotI(sRegI reg) ++%{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegI); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotF(sRegF reg) ++%{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegF); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotD(sRegD reg) ++%{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegD); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotL(sRegL reg) ++%{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ // match(RegL); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x02); // RSP ++ index(0xffffffff); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++// Special operand allowing long args to int ops to be truncated for free ++ ++operand iRegL2I(iRegL reg) %{ ++ ++ op_cost(0); ++ ++ match(ConvL2I reg); ++ ++ format %{ "l2i($reg)" %} ++ ++ interface(REG_INTER) ++%} ++ ++ ++// Comparison Operands ++// NOTE: Label is a predefined operand which should not be redefined in ++// the AD file. It is generically handled within the ADLC. ++ ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOpU. ++ ++ ++// used for signed integral comparisons and fp comparisons ++operand cmpOp() ++%{ ++ match(Bool); ++ ++ format %{ "" %} ++ ++ // the values in interface derives from struct BoolTest::mask ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gt"); ++ overflow(0x2, "overflow"); ++ less(0x3, "lt"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "le"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "ge"); ++ %} ++%} ++ ++// used for unsigned integral comparisons ++operand cmpOpU() ++%{ ++ match(Bool); ++ ++ format %{ "" %} ++ // the values in interface derives from struct BoolTest::mask ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gtu"); ++ overflow(0x2, "overflow"); ++ less(0x3, "ltu"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "leu"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "geu"); ++ %} ++%} ++ ++// used for certain integral comparisons which can be ++// converted to bxx instructions ++operand cmpOpEqNe() ++%{ ++ match(Bool); ++ op_cost(0); ++ predicate(n->as_Bool()->_test._test == BoolTest::ne || ++ n->as_Bool()->_test._test == BoolTest::eq); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gt"); ++ overflow(0x2, "overflow"); ++ less(0x3, "lt"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "le"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "ge"); ++ %} ++%} ++ ++operand cmpOpULtGe() ++%{ ++ match(Bool); ++ op_cost(0); ++ predicate(n->as_Bool()->_test._test == BoolTest::lt || ++ n->as_Bool()->_test._test == BoolTest::ge); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gt"); ++ overflow(0x2, "overflow"); ++ less(0x3, "lt"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "le"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "ge"); ++ %} ++%} ++ ++operand cmpOpUEqNeLeGt() ++%{ ++ match(Bool); ++ op_cost(0); ++ predicate(n->as_Bool()->_test._test == BoolTest::ne || ++ n->as_Bool()->_test._test == BoolTest::eq || ++ n->as_Bool()->_test._test == BoolTest::le || ++ n->as_Bool()->_test._test == BoolTest::gt); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x0, "eq"); ++ greater(0x1, "gt"); ++ overflow(0x2, "overflow"); ++ less(0x3, "lt"); ++ not_equal(0x4, "ne"); ++ less_equal(0x5, "le"); ++ no_overflow(0x6, "no_overflow"); ++ greater_equal(0x7, "ge"); ++ %} ++%} ++ ++ ++// Flags register, used as output of compare logic ++operand rFlagsReg() ++%{ ++ constraint(ALLOC_IN_RC(reg_flags)); ++ match(RegFlags); ++ ++ op_cost(0); ++ format %{ "RFLAGS" %} ++ interface(REG_INTER); ++%} ++ ++// Special Registers ++ ++// Method Register ++operand inline_cache_RegP(iRegP reg) ++%{ ++ constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg ++ match(reg); ++ match(iRegPNoSp); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//----------OPERAND CLASSES---------------------------------------------------- ++// Operand Classes are groups of operands that are used as to simplify ++// instruction definitions by not requiring the AD writer to specify ++// separate instructions for every form of operand when the ++// instruction accepts multiple operand types with the same basic ++// encoding and format. The classic case of this is memory operands. ++ ++// memory is used to define read/write location for load/store ++// instruction defs. we can turn a memory op into an Address ++ ++opclass memory(indirect, indOffI, indOffL, indirectN, indOffIN, indOffLN); ++ ++// iRegIorL2I is used for src inputs in rules for 32 bit int (I) ++// operations. it allows the src to be either an iRegI or a (ConvL2I ++// iRegL). in the latter case the l2i normally planted for a ConvL2I ++// can be elided because the 32-bit instruction will just employ the ++// lower 32 bits anyway. ++// ++// n.b. this does not elide all L2I conversions. if the truncated ++// value is consumed by more than one operation then the ConvL2I ++// cannot be bundled into the consuming nodes so an l2i gets planted ++// (actually a mvw $dst $src) and the downstream instructions consume ++// the result of the l2i as an iRegI input. That's a shame since the ++// mvw is actually redundant but its not too costly. ++ ++opclass iRegIorL2I(iRegI, iRegL2I); ++opclass iRegIorL(iRegI, iRegL); ++opclass iRegNorP(iRegN, iRegP); ++opclass iRegILNP(iRegI, iRegL, iRegN, iRegP); ++opclass iRegILNPNoSp(iRegINoSp, iRegLNoSp, iRegNNoSp, iRegPNoSp); ++opclass immIorL(immI, immL); ++ ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. ++ ++// For specific pipelines, e.g. generic RISC-V, define the stages of that pipeline ++//pipe_desc(ID, EX, MEM, WR); ++#define ID S0 ++#define EX S1 ++#define MEM S2 ++#define WR S3 ++ ++// Integer ALU reg operation ++pipeline %{ ++ ++attributes %{ ++ // RISC-V instructions are of fixed length ++ fixed_size_instructions; // Fixed size instructions TODO does ++ max_instructions_per_bundle = 2; // Generic RISC-V 1, Sifive Series 7 2 ++ // RISC-V instructions come in 32-bit word units ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 64; // The processor fetches one line ++ instruction_fetch_units = 1; // of 64 bytes ++ ++ // List of nop instructions ++ nops( MachNop ); ++%} ++ ++// We don't use an actual pipeline model so don't care about resources ++// or description. we do use pipeline classes to introduce fixed ++// latencies ++ ++//----------RESOURCES---------------------------------------------------------- ++// Resources are the functional units available to the machine ++ ++// Generic RISC-V pipeline ++// 1 decoder ++// 1 instruction decoded per cycle ++// 1 load/store ops per cycle, 1 branch, 1 FPU ++// 1 mul, 1 div ++ ++resources ( DECODE, ++ ALU, ++ MUL, ++ DIV, ++ BRANCH, ++ LDST, ++ FPU); ++ ++//----------PIPELINE DESCRIPTION----------------------------------------------- ++// Pipeline Description specifies the stages in the machine's pipeline ++ ++// Define the pipeline as a generic 6 stage pipeline ++pipe_desc(S0, S1, S2, S3, S4, S5); ++ ++//----------PIPELINE CLASSES--------------------------------------------------- ++// Pipeline Classes describe the stages in which input and output are ++// referenced by the hardware pipeline. ++ ++pipe_class fp_dop_reg_reg_s(fRegF dst, fRegF src1, fRegF src2) ++%{ ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_dop_reg_reg_d(fRegD dst, fRegD src1, fRegD src2) ++%{ ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_uop_s(fRegF dst, fRegF src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_uop_d(fRegD dst, fRegD src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_d2f(fRegF dst, fRegD src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_f2d(fRegD dst, fRegF src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_f2i(iRegINoSp dst, fRegF src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_f2l(iRegLNoSp dst, fRegF src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_i2f(fRegF dst, iRegIorL2I src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_l2f(fRegF dst, iRegL src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_d2i(iRegINoSp dst, fRegD src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_d2l(iRegLNoSp dst, fRegD src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_i2d(fRegD dst, iRegIorL2I src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_l2d(fRegD dst, iRegIorL2I src) ++%{ ++ single_instruction; ++ src : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_div_s(fRegF dst, fRegF src1, fRegF src2) ++%{ ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_div_d(fRegD dst, fRegD src1, fRegD src2) ++%{ ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_sqrt_s(fRegF dst, fRegF src1, fRegF src2) ++%{ ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_sqrt_d(fRegD dst, fRegD src1, fRegD src2) ++%{ ++ single_instruction; ++ src1 : S1(read); ++ src2 : S2(read); ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_load_constant_s(fRegF dst) ++%{ ++ single_instruction; ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_load_constant_d(fRegD dst) ++%{ ++ single_instruction; ++ dst : S5(write); ++ DECODE : ID; ++ FPU : S5; ++%} ++ ++pipe_class fp_load_mem_s(fRegF dst, memory mem) ++%{ ++ single_instruction; ++ mem : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ LDST : MEM; ++%} ++ ++pipe_class fp_load_mem_d(fRegD dst, memory mem) ++%{ ++ single_instruction; ++ mem : S1(read); ++ dst : S5(write); ++ DECODE : ID; ++ LDST : MEM; ++%} ++ ++pipe_class fp_store_reg_s(fRegF src, memory mem) ++%{ ++ single_instruction; ++ src : S1(read); ++ mem : S5(write); ++ DECODE : ID; ++ LDST : MEM; ++%} ++ ++pipe_class fp_store_reg_d(fRegD src, memory mem) ++%{ ++ single_instruction; ++ src : S1(read); ++ mem : S5(write); ++ DECODE : ID; ++ LDST : MEM; ++%} ++ ++//------- Integer ALU operations -------------------------- ++ ++// Integer ALU reg-reg operation ++// Operands needs in ID, result generated in EX ++// E.g. ADD Rd, Rs1, Rs2 ++pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} ++ ++// Integer ALU reg operation with constant shift ++// E.g. SLLI Rd, Rs1, #shift ++pipe_class ialu_reg_shift(iRegI dst, iRegI src1) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} ++ ++// Integer ALU reg-reg operation with variable shift ++// both operands must be available in ID ++// E.g. SLL Rd, Rs1, Rs2 ++pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} ++ ++// Integer ALU reg operation ++// E.g. NEG Rd, Rs2 ++pipe_class ialu_reg(iRegI dst, iRegI src) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} ++ ++// Integer ALU reg immediate operation ++// E.g. ADDI Rd, Rs1, #imm ++pipe_class ialu_reg_imm(iRegI dst, iRegI src1) ++%{ ++ single_instruction; ++ dst : EX(write); ++ src1 : ID(read); ++ DECODE : ID; ++ ALU : EX; ++%} ++ ++// Integer ALU immediate operation (no source operands) ++// E.g. LI Rd, #imm ++pipe_class ialu_imm(iRegI dst) ++%{ ++ single_instruction; ++ dst : EX(write); ++ DECODE : ID; ++ ALU : EX; ++%} ++ ++//------- Multiply pipeline operations -------------------- ++ ++// Multiply reg-reg ++// E.g. MULW Rd, Rs1, Rs2 ++pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ MUL : WR; ++%} ++ ++// E.g. MUL RD, Rs1, Rs2 ++pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ fixed_latency(3); // Maximum latency for 64 bit mul ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ MUL : WR; ++%} ++ ++//------- Divide pipeline operations -------------------- ++ ++// E.g. DIVW Rd, Rs1, Rs2 ++pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ fixed_latency(8); // Maximum latency for 32 bit divide ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ DIV : WR; ++%} ++ ++// E.g. DIV RD, Rs1, Rs2 ++pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ fixed_latency(16); // Maximum latency for 64 bit divide ++ dst : WR(write); ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ DIV : WR; ++%} ++ ++//------- Load pipeline operations ------------------------ ++ ++// Load - reg, mem ++// E.g. LA Rd, mem ++pipe_class iload_reg_mem(iRegI dst, memory mem) ++%{ ++ single_instruction; ++ dst : WR(write); ++ mem : ID(read); ++ DECODE : ID; ++ LDST : MEM; ++%} ++ ++// Load - reg, reg ++// E.g. LD Rd, Rs ++pipe_class iload_reg_reg(iRegI dst, iRegI src) ++%{ ++ single_instruction; ++ dst : WR(write); ++ src : ID(read); ++ DECODE : ID; ++ LDST : MEM; ++%} ++ ++//------- Store pipeline operations ----------------------- ++ ++// Store - zr, mem ++// E.g. SD zr, mem ++pipe_class istore_mem(memory mem) ++%{ ++ single_instruction; ++ mem : ID(read); ++ DECODE : ID; ++ LDST : MEM; ++%} ++ ++// Store - reg, mem ++// E.g. SD Rs, mem ++pipe_class istore_reg_mem(iRegI src, memory mem) ++%{ ++ single_instruction; ++ mem : ID(read); ++ src : EX(read); ++ DECODE : ID; ++ LDST : MEM; ++%} ++ ++// Store - reg, reg ++// E.g. SD Rs2, Rs1 ++pipe_class istore_reg_reg(iRegI dst, iRegI src) ++%{ ++ single_instruction; ++ dst : ID(read); ++ src : EX(read); ++ DECODE : ID; ++ LDST : MEM; ++%} ++ ++//------- Store pipeline operations ----------------------- ++ ++// Branch ++pipe_class pipe_branch() ++%{ ++ single_instruction; ++ DECODE : ID; ++ BRANCH : EX; ++%} ++ ++// Branch ++pipe_class pipe_branch_reg(iRegI src) ++%{ ++ single_instruction; ++ src : ID(read); ++ DECODE : ID; ++ BRANCH : EX; ++%} ++ ++// Compare & Branch ++// E.g. BEQ Rs1, Rs2, L ++pipe_class pipe_cmp_branch(iRegI src1, iRegI src2) ++%{ ++ single_instruction; ++ src1 : ID(read); ++ src2 : ID(read); ++ DECODE : ID; ++ BRANCH : EX; ++%} ++ ++// E.g. BEQZ Rs, L ++pipe_class pipe_cmpz_branch(iRegI src) ++%{ ++ single_instruction; ++ src : ID(read); ++ DECODE : ID; ++ BRANCH : EX; ++%} ++ ++//------- Synchronisation operations ---------------------- ++// Any operation requiring serialization ++// E.g. FENCE/Atomic Ops/Load Acquire/Store Release ++pipe_class pipe_serial() ++%{ ++ single_instruction; ++ force_serialization; ++ fixed_latency(16); ++ DECODE : ID; ++ LDST : MEM; ++%} ++ ++pipe_class pipe_slow() ++%{ ++ instruction_count(10); ++ multiple_bundles; ++ force_serialization; ++ fixed_latency(16); ++ DECODE : ID; ++ LDST : MEM; ++%} ++ ++// Empty pipeline class ++pipe_class pipe_class_empty() ++%{ ++ single_instruction; ++ fixed_latency(0); ++%} ++ ++// Default pipeline class. ++pipe_class pipe_class_default() ++%{ ++ single_instruction; ++ fixed_latency(2); ++%} ++ ++// Pipeline class for compares. ++pipe_class pipe_class_compare() ++%{ ++ single_instruction; ++ fixed_latency(16); ++%} ++ ++// Pipeline class for memory operations. ++pipe_class pipe_class_memory() ++%{ ++ single_instruction; ++ fixed_latency(16); ++%} ++ ++// Pipeline class for call. ++pipe_class pipe_class_call() ++%{ ++ single_instruction; ++ fixed_latency(100); ++%} ++ ++// Define the class for the Nop node. ++define %{ ++ MachNop = pipe_class_empty; ++%} ++%} ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// rrspectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. ++ ++// ============================================================================ ++// Memory (Load/Store) Instructions ++ ++// Load Instructions ++ ++// Load Byte (8 bit signed) ++instruct loadB(iRegINoSp dst, memory mem) ++%{ ++ match(Set dst (LoadB mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lb $dst, $mem\t# byte, #@loadB" %} ++ ++ ins_encode %{ ++ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Byte (8 bit signed) into long ++instruct loadB2L(iRegLNoSp dst, memory mem) ++%{ ++ match(Set dst (ConvI2L (LoadB mem))); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lb $dst, $mem\t# byte, #@loadB2L" %} ++ ++ ins_encode %{ ++ __ lb(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Byte (8 bit unsigned) ++instruct loadUB(iRegINoSp dst, memory mem) ++%{ ++ match(Set dst (LoadUB mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lbu $dst, $mem\t# byte, #@loadUB" %} ++ ++ ins_encode %{ ++ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Byte (8 bit unsigned) into long ++instruct loadUB2L(iRegLNoSp dst, memory mem) ++%{ ++ match(Set dst (ConvI2L (LoadUB mem))); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lbu $dst, $mem\t# byte, #@loadUB2L" %} ++ ++ ins_encode %{ ++ __ lbu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Short (16 bit signed) ++instruct loadS(iRegINoSp dst, memory mem) ++%{ ++ match(Set dst (LoadS mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lh $dst, $mem\t# short, #@loadS" %} ++ ++ ins_encode %{ ++ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Short (16 bit signed) into long ++instruct loadS2L(iRegLNoSp dst, memory mem) ++%{ ++ match(Set dst (ConvI2L (LoadS mem))); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lh $dst, $mem\t# short, #@loadS2L" %} ++ ++ ins_encode %{ ++ __ lh(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Char (16 bit unsigned) ++instruct loadUS(iRegINoSp dst, memory mem) ++%{ ++ match(Set dst (LoadUS mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lhu $dst, $mem\t# short, #@loadUS" %} ++ ++ ins_encode %{ ++ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Short/Char (16 bit unsigned) into long ++instruct loadUS2L(iRegLNoSp dst, memory mem) ++%{ ++ match(Set dst (ConvI2L (LoadUS mem))); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lhu $dst, $mem\t# short, #@loadUS2L" %} ++ ++ ins_encode %{ ++ __ lhu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Integer (32 bit signed) ++instruct loadI(iRegINoSp dst, memory mem) ++%{ ++ match(Set dst (LoadI mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lw $dst, $mem\t# int, #@loadI" %} ++ ++ ins_encode %{ ++ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Integer (32 bit signed) into long ++instruct loadI2L(iRegLNoSp dst, memory mem) ++%{ ++ match(Set dst (ConvI2L (LoadI mem))); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lw $dst, $mem\t# int, #@loadI2L" %} ++ ++ ins_encode %{ ++ __ lw(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Integer (32 bit unsigned) into long ++instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask) ++%{ ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# int, #@loadUI2L" %} ++ ++ ins_encode %{ ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Long (64 bit signed) ++instruct loadL(iRegLNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadL mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "ld $dst, $mem\t# int, #@loadL" %} ++ ++ ins_encode %{ ++ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Range ++instruct loadRange(iRegINoSp dst, memory mem) ++%{ ++ match(Set dst (LoadRange mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# range, #@loadRange" %} ++ ++ ins_encode %{ ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Pointer ++instruct loadP(iRegPNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadP mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "ld $dst, $mem\t# ptr, #@loadP" %} ++ ++ ins_encode %{ ++ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Compressed Pointer ++instruct loadN(iRegNNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadN mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# loadN, compressed ptr, #@loadN" %} ++ ++ ins_encode %{ ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Klass Pointer ++instruct loadKlass(iRegPNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadKlass mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "ld $dst, $mem\t# class, #@loadKlass" %} ++ ++ ins_encode %{ ++ __ ld(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Narrow Klass Pointer ++instruct loadNKlass(iRegNNoSp dst, memory mem) ++%{ ++ match(Set dst (LoadNKlass mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "lwu $dst, $mem\t# loadNKlass, compressed class ptr, #@loadNKlass" %} ++ ++ ins_encode %{ ++ __ lwu(as_Register($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_mem); ++%} ++ ++// Load Float ++instruct loadF(fRegF dst, memory mem) ++%{ ++ match(Set dst (LoadF mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "flw $dst, $mem\t# float, #@loadF" %} ++ ++ ins_encode %{ ++ __ flw(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(fp_load_mem_s); ++%} ++ ++// Load Double ++instruct loadD(fRegD dst, memory mem) ++%{ ++ match(Set dst (LoadD mem)); ++ ++ ins_cost(LOAD_COST); ++ format %{ "fld $dst, $mem\t# double, #@loadD" %} ++ ++ ins_encode %{ ++ __ fld(as_FloatRegister($dst$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(fp_load_mem_d); ++%} ++ ++// Load Int Constant ++instruct loadConI(iRegINoSp dst, immI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(ALU_COST); ++ format %{ "li $dst, $src\t# int, #@loadConI" %} ++ ++ ins_encode(riscv_enc_li_imm(dst, src)); ++ ++ ins_pipe(ialu_imm); ++%} ++ ++// Load Long Constant ++instruct loadConL(iRegLNoSp dst, immL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(ALU_COST); ++ format %{ "li $dst, $src\t# long, #@loadConL" %} ++ ++ ins_encode(riscv_enc_li_imm(dst, src)); ++ ++ ins_pipe(ialu_imm); ++%} ++ ++// Load Pointer Constant ++instruct loadConP(iRegPNoSp dst, immP con) ++%{ ++ match(Set dst con); ++ ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# ptr, #@loadConP" %} ++ ++ ins_encode(riscv_enc_mov_p(dst, con)); ++ ++ ins_pipe(ialu_imm); ++%} ++ ++// Load Null Pointer Constant ++instruct loadConP0(iRegPNoSp dst, immP0 con) ++%{ ++ match(Set dst con); ++ ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# NULL ptr, #@loadConP0" %} ++ ++ ins_encode(riscv_enc_mov_zero(dst)); ++ ++ ins_pipe(ialu_imm); ++%} ++ ++// Load Pointer Constant One ++instruct loadConP1(iRegPNoSp dst, immP_1 con) ++%{ ++ match(Set dst con); ++ ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# load ptr constant one, #@loadConP1" %} ++ ++ ins_encode(riscv_enc_mov_p1(dst)); ++ ++ ins_pipe(ialu_imm); ++%} ++ ++// Load Poll Page Constant ++instruct loadConPollPage(iRegPNoSp dst, immPollPage con) ++%{ ++ match(Set dst con); ++ ++ ins_cost(ALU_COST * 6); ++ format %{ "movptr $dst, $con\t# Poll Page Ptr, #@loadConPollPage" %} ++ ++ ins_encode(riscv_enc_mov_poll_page(dst, con)); ++ ++ ins_pipe(ialu_imm); ++%} ++ ++// Load Byte Map Base Constant ++instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con) ++%{ ++ match(Set dst con); ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# Byte Map Base, #@loadByteMapBase" %} ++ ++ ins_encode(riscv_enc_mov_byte_map_base(dst)); ++ ++ ins_pipe(ialu_imm); ++%} ++ ++// Load Narrow Pointer Constant ++instruct loadConN(iRegNNoSp dst, immN con) ++%{ ++ match(Set dst con); ++ ++ ins_cost(ALU_COST * 4); ++ format %{ "mv $dst, $con\t# compressed ptr, #@loadConN" %} ++ ++ ins_encode(riscv_enc_mov_n(dst, con)); ++ ++ ins_pipe(ialu_imm); ++%} ++ ++// Load Narrow Null Pointer Constant ++instruct loadConN0(iRegNNoSp dst, immN0 con) ++%{ ++ match(Set dst con); ++ ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $con\t# compressed NULL ptr, #@loadConN0" %} ++ ++ ins_encode(riscv_enc_mov_zero(dst)); ++ ++ ins_pipe(ialu_imm); ++%} ++ ++// Load Narrow Klass Constant ++instruct loadConNKlass(iRegNNoSp dst, immNKlass con) ++%{ ++ match(Set dst con); ++ ++ ins_cost(ALU_COST * 6); ++ format %{ "mv $dst, $con\t# compressed klass ptr, #@loadConNKlass" %} ++ ++ ins_encode(riscv_enc_mov_nk(dst, con)); ++ ++ ins_pipe(ialu_imm); ++%} ++ ++// Load Float Constant ++instruct loadConF(fRegF dst, immF con) %{ ++ match(Set dst con); ++ ++ ins_cost(LOAD_COST); ++ format %{ ++ "flw $dst, [$constantaddress]\t# load from constant table: float=$con, #@loadConF" ++ %} ++ ++ ins_encode %{ ++ __ flw(as_FloatRegister($dst$$reg), $constantaddress($con)); ++ %} ++ ++ ins_pipe(fp_load_constant_s); ++%} ++ ++instruct loadConF0(fRegF dst, immF0 con) %{ ++ match(Set dst con); ++ ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.w.x $dst, zr\t# float, #@loadConF0" %} ++ ++ ins_encode %{ ++ __ fmv_w_x(as_FloatRegister($dst$$reg), zr); ++ %} ++ ++ ins_pipe(fp_load_constant_s); ++%} ++ ++// Load Double Constant ++instruct loadConD(fRegD dst, immD con) %{ ++ match(Set dst con); ++ ++ ins_cost(LOAD_COST); ++ format %{ ++ "fld $dst, [$constantaddress]\t# load from constant table: double=$con, #@loadConD" ++ %} ++ ++ ins_encode %{ ++ __ fld(as_FloatRegister($dst$$reg), $constantaddress($con)); ++ %} ++ ++ ins_pipe(fp_load_constant_d); ++%} ++ ++instruct loadConD0(fRegD dst, immD0 con) %{ ++ match(Set dst con); ++ ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.d.x $dst, zr\t# double, #@loadConD0" %} ++ ++ ins_encode %{ ++ __ fmv_d_x(as_FloatRegister($dst$$reg), zr); ++ %} ++ ++ ins_pipe(fp_load_constant_d); ++%} ++ ++// Store Instructions ++// Store CMS card-mark Immediate ++instruct storeimmCM0(immI0 zero, memory mem) ++%{ ++ match(Set mem (StoreCM mem zero)); ++ predicate(unnecessary_storestore(n)); ++ ++ ins_cost(STORE_COST); ++ format %{ "storestore (elided)\n\t" ++ "sb zr, $mem\t# byte, #@storeimmCM0" %} ++ ++ ins_encode %{ ++ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_mem); ++%} ++ ++// Store CMS card-mark Immediate with intervening StoreStore ++// needed when using CMS with no conditional card marking ++instruct storeimmCM0_ordered(immI0 zero, memory mem) ++%{ ++ match(Set mem (StoreCM mem zero)); ++ ++ ins_cost(ALU_COST + STORE_COST); ++ format %{ "membar(StoreStore)\n\t" ++ "sb zr, $mem\t# byte, #@storeimmCM0_ordered" %} ++ ++ ins_encode %{ ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_mem); ++%} ++ ++// Store Byte ++instruct storeB(iRegIorL2I src, memory mem) ++%{ ++ match(Set mem (StoreB mem src)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sb $src, $mem\t# byte, #@storeB" %} ++ ++ ins_encode %{ ++ __ sb(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_reg_mem); ++%} ++ ++instruct storeimmB0(immI0 zero, memory mem) ++%{ ++ match(Set mem (StoreB mem zero)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sb zr, $mem\t# byte, #@storeimmB0" %} ++ ++ ins_encode %{ ++ __ sb(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_mem); ++%} ++ ++// Store Char/Short ++instruct storeC(iRegIorL2I src, memory mem) ++%{ ++ match(Set mem (StoreC mem src)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sh $src, $mem\t# short, #@storeC" %} ++ ++ ins_encode %{ ++ __ sh(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_reg_mem); ++%} ++ ++instruct storeimmC0(immI0 zero, memory mem) ++%{ ++ match(Set mem (StoreC mem zero)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sh zr, $mem\t# short, #@storeimmC0" %} ++ ++ ins_encode %{ ++ __ sh(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_mem); ++%} ++ ++// Store Integer ++instruct storeI(iRegIorL2I src, memory mem) ++%{ ++ match(Set mem(StoreI mem src)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sw $src, $mem\t# int, #@storeI" %} ++ ++ ins_encode %{ ++ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_reg_mem); ++%} ++ ++instruct storeimmI0(immI0 zero, memory mem) ++%{ ++ match(Set mem(StoreI mem zero)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sw zr, $mem\t# int, #@storeimmI0" %} ++ ++ ins_encode %{ ++ __ sw(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_mem); ++%} ++ ++// Store Long (64 bit signed) ++instruct storeL(iRegL src, memory mem) ++%{ ++ match(Set mem (StoreL mem src)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sd $src, $mem\t# long, #@storeL" %} ++ ++ ins_encode %{ ++ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_reg_mem); ++%} ++ ++// Store Long (64 bit signed) ++instruct storeimmL0(immL0 zero, memory mem) ++%{ ++ match(Set mem (StoreL mem zero)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sd zr, $mem\t# long, #@storeimmL0" %} ++ ++ ins_encode %{ ++ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_mem); ++%} ++ ++// Store Pointer ++instruct storeP(iRegP src, memory mem) ++%{ ++ match(Set mem (StoreP mem src)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sd $src, $mem\t# ptr, #@storeP" %} ++ ++ ins_encode %{ ++ __ sd(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_reg_mem); ++%} ++ ++// Store Pointer ++instruct storeimmP0(immP0 zero, memory mem) ++%{ ++ match(Set mem (StoreP mem zero)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sd zr, $mem\t# ptr, #@storeimmP0" %} ++ ++ ins_encode %{ ++ __ sd(zr, Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_mem); ++%} ++ ++// Store Compressed Pointer ++instruct storeN(iRegN src, memory mem) ++%{ ++ match(Set mem (StoreN mem src)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sw $src, $mem\t# compressed ptr, #@storeN" %} ++ ++ ins_encode %{ ++ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_reg_mem); ++%} ++ ++instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem) ++%{ ++ match(Set mem (StoreN mem zero)); ++ predicate(Universe::narrow_oop_base() == NULL && ++ Universe::narrow_klass_base() == NULL); ++ ++ ins_cost(STORE_COST); ++ format %{ "sw rheapbase, $mem\t# compressed ptr (rheapbase==0), #@storeImmN0" %} ++ ++ ins_encode %{ ++ __ sw(as_Register($heapbase$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_reg_mem); ++%} ++ ++// Store Float ++instruct storeF(fRegF src, memory mem) ++%{ ++ match(Set mem (StoreF mem src)); ++ ++ ins_cost(STORE_COST); ++ format %{ "fsw $src, $mem\t# float, #@storeF" %} ++ ++ ins_encode %{ ++ __ fsw(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(fp_store_reg_s); ++%} ++ ++// Store Double ++instruct storeD(fRegD src, memory mem) ++%{ ++ match(Set mem (StoreD mem src)); ++ ++ ins_cost(STORE_COST); ++ format %{ "fsd $src, $mem\t# double, #@storeD" %} ++ ++ ins_encode %{ ++ __ fsd(as_FloatRegister($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(fp_store_reg_d); ++%} ++ ++// Store Compressed Klass Pointer ++instruct storeNKlass(iRegN src, memory mem) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ ++ ins_cost(STORE_COST); ++ format %{ "sw $src, $mem\t# compressed klass ptr, #@storeNKlass" %} ++ ++ ins_encode %{ ++ __ sw(as_Register($src$$reg), Address(as_Register($mem$$base), $mem$$disp)); ++ %} ++ ++ ins_pipe(istore_reg_mem); ++%} ++ ++// ============================================================================ ++// Atomic operation instructions ++// ++// Intel and SPARC both implement Ideal Node LoadPLocked and ++// Store{PIL}Conditional instructions using a normal load for the ++// LoadPLocked and a CAS for the Store{PIL}Conditional. ++// ++// The ideal code appears only to use LoadPLocked/storePConditional as a ++// pair to lock object allocations from Eden space when not using ++// TLABs. ++// ++// There does not appear to be a Load{IL}Locked Ideal Node and the ++// Ideal code appears to use Store{IL}Conditional as an alias for CAS ++// and to use StoreIConditional only for 32-bit and StoreLConditional ++// only for 64-bit. ++// ++// We implement LoadPLocked and storePConditional instructions using, ++// respectively the RISCV hw load-reserve and store-conditional ++// instructions. Whereas we must implement each of ++// Store{IL}Conditional using a CAS which employs a pair of ++// instructions comprising a load-reserve followed by a ++// store-conditional. ++ ++ ++// Locked-load (load reserved) of the current heap-top ++// used when updating the eden heap top ++// implemented using lr_d on RISCV64 ++instruct loadPLocked(iRegPNoSp dst, indirect mem) ++%{ ++ match(Set dst (LoadPLocked mem)); ++ ++ ins_cost(ALU_COST * 2 + LOAD_COST); ++ ++ format %{ "lr.d $dst, $mem\t# ptr load reserved, #@loadPLocked" %} ++ ++ ins_encode %{ ++ __ la(t0, Address(as_Register($mem$$base), $mem$$disp)); ++ __ lr_d($dst$$Register, t0, Assembler::aq); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++// implemented using sc_d on RISCV. ++instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr) ++%{ ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); ++ ++ ins_cost(ALU_COST * 2 + STORE_COST); ++ ++ format %{ ++ "sc_d t1, $newval $heap_top_ptr,\t# ptr store conditional, #@storePConditional" ++ %} ++ ++ ins_encode %{ ++ __ la(t0, Address(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp)); ++ __ sc_d($cr$$Register, $newval$$Register, t0, Assembler::rl); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++// storeLConditional is used by PhaseMacroExpand::expand_lock_node ++// when attempting to rebias a lock towards the current thread. ++instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) ++%{ ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + 2 * BRANCH_COST); ++ ++ format %{ ++ "cmpxchg t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" ++ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeLConditional" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); ++ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++// storeIConditional also has acquire semantics, for no better reason ++// than matching storeLConditional. ++instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) ++%{ ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2); ++ ++ format %{ ++ "cmpxchgw t1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval" ++ "xorr $cr, $cr, $oldval\t# $cr == 0 on successful write, #@storeIConditional" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $cr$$Register); ++ __ xorr($cr$$Register,$cr$$Register, $oldval$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++// standard CompareAndSwapX when we are using barriers ++// these have higher priority than the rules selected by a predicate ++instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ match(Set res (CompareAndSwapB mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapB" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ match(Set res (CompareAndSwapS mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapS" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ Assembler::relaxed /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) ++%{ ++ match(Set res (CompareAndSwapI mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapI" ++ %} ++ ++ ins_encode(riscv_enc_cmpxchgw(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) ++%{ ++ match(Set res (CompareAndSwapL mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapL" ++ %} ++ ++ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapP" ++ %} ++ ++ ins_encode(riscv_enc_cmpxchg(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) ++%{ ++ match(Set res (CompareAndSwapN mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapN" ++ %} ++ ++ ins_encode(riscv_enc_cmpxchgn(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); ++%} ++ ++// alternative CompareAndSwapX when we are eliding barriers ++instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (CompareAndSwapB mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 10 + BRANCH_COST * 4); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (byte) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapBAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (CompareAndSwapS mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 11 + BRANCH_COST * 4); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (short) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapSAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ Assembler::aq /* acquire */, Assembler::rl /* release */, $res$$Register, ++ true /* result as bool */, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (CompareAndSwapI mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapIAcq" ++ %} ++ ++ ins_encode(riscv_enc_cmpxchgw_acq(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (CompareAndSwapL mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapLAcq" ++ %} ++ ++ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (CompareAndSwapP mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 6 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapPAcq" ++ %} ++ ++ ins_encode(riscv_enc_cmpxchg_acq(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (CompareAndSwapN mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); ++ ++ format %{ ++ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval\n\t" ++ "mv $res, $res == $oldval\t# $res <-- ($res == $oldval ? 1 : 0), #@compareAndSwapNAcq" ++ %} ++ ++ ins_encode(riscv_enc_cmpxchgn_acq(res, mem, oldval, newval)); ++ ++ ins_pipe(pipe_slow); ++%} ++ ++// Sundry CAS operations. Note that release is always true, ++// regardless of the memory ordering of the CAS. This is because we ++// need the volatile case to be sequentially consistent but there is ++// no trailing StoreLoad barrier emitted by C2. Unfortunately we ++// can't check the type of memory ordering here, so we always emit a ++// sc_d(w) with rl bit set. ++instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeB" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeS" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeI" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeL" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeN" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeP" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (CompareAndExchangeB mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 5); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeBAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (CompareAndExchangeS mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 6); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeSAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ /*result_as_bool*/ false, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (CompareAndExchangeI mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeIAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (CompareAndExchangeL mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeLAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangeNAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (CompareAndExchangeP mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST); ++ ++ effect(TEMP_DEF res); ++ ++ format %{ ++ "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval, #@compareAndExchangePAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapB" ++ %} ++ ++ ins_encode %{ ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapS" ++ %} ++ ++ ins_encode %{ ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapI" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapL" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapN" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapP" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (WeakCompareAndSwapB mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 6); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapBAcq" ++ %} ++ ++ ins_encode %{ ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int8, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI_R12 oldval, iRegI_R13 newval, ++ iRegI tmp1, iRegI tmp2, iRegI tmp3, rFlagsReg cr) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (WeakCompareAndSwapS mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 7); ++ ++ effect(TEMP_DEF res, KILL cr, USE_KILL oldval, USE_KILL newval, TEMP tmp1, TEMP tmp2, TEMP tmp3); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapSAcq" ++ %} ++ ++ ins_encode %{ ++ __ weak_cmpxchg_narrow_value(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int16, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (WeakCompareAndSwapI mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapIAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (WeakCompareAndSwapL mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapLAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapNAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::uint32, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); ++ ++ ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 2); ++ ++ format %{ ++ "cmpxchg_weak_acq $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval\n\t" ++ "# $res == 1 when success, #@weakCompareAndSwapPAcq" ++ %} ++ ++ ins_encode %{ ++ __ cmpxchg_weak(as_Register($mem$$base), $oldval$$Register, $newval$$Register, Assembler::int64, ++ /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) ++%{ ++ match(Set prev (GetAndSetI mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchgw $prev, $newv, [$mem]\t#@get_and_setI" %} ++ ++ ins_encode %{ ++ __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) ++%{ ++ match(Set prev (GetAndSetL mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setL" %} ++ ++ ins_encode %{ ++ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) ++%{ ++ match(Set prev (GetAndSetN mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchgwu $prev, $newv, [$mem]\t#@get_and_setN" %} ++ ++ ins_encode %{ ++ __ atomic_xchgwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) ++%{ ++ match(Set prev (GetAndSetP mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchg $prev, $newv, [$mem]\t#@get_and_setP" %} ++ ++ ins_encode %{ ++ __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set prev (GetAndSetI mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchgw_acq $prev, $newv, [$mem]\t#@get_and_setIAcq" %} ++ ++ ins_encode %{ ++ __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set prev (GetAndSetL mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setLAcq" %} ++ ++ ins_encode %{ ++ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set prev (GetAndSetN mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchgwu_acq $prev, $newv, [$mem]\t#@get_and_setNAcq" %} ++ ++ ins_encode %{ ++ __ atomic_xchgalwu($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set prev (GetAndSetP mem newv)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "atomic_xchg_acq $prev, $newv, [$mem]\t#@get_and_setPAcq" %} ++ ++ ins_encode %{ ++ __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) ++%{ ++ match(Set newval (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addL" %} ++ ++ ins_encode %{ ++ __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used()); ++ ++ match(Set dummy (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL [$mem], $incr\t#@get_and_addL_no_res" %} ++ ++ ins_encode %{ ++ __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAdd incr) ++%{ ++ match(Set newval (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL $newval, [$mem], $incr\t#@get_and_addLi" %} ++ ++ ins_encode %{ ++ __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAdd incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used()); ++ ++ match(Set dummy (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL [$mem], $incr\t#@get_and_addLi_no_res" %} ++ ++ ins_encode %{ ++ __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) ++%{ ++ match(Set newval (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addI" %} ++ ++ ins_encode %{ ++ __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used()); ++ ++ match(Set dummy (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI [$mem], $incr\t#@get_and_addI_no_res" %} ++ ++ ins_encode %{ ++ __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAdd incr) ++%{ ++ match(Set newval (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI $newval, [$mem], $incr\t#@get_and_addIi" %} ++ ++ ins_encode %{ ++ __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAdd incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used()); ++ ++ match(Set dummy (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI [$mem], $incr\t#@get_and_addIi_no_res" %} ++ ++ ins_encode %{ ++ __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set newval (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLAcq" %} ++ ++ ins_encode %{ ++ __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); ++ ++ match(Set dummy (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addL_no_resAcq" %} ++ ++ ins_encode %{ ++ __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAdd incr) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set newval (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL_acq $newval, [$mem], $incr\t#@get_and_addLiAcq" %} ++ ++ ins_encode %{ ++ __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAdd incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); ++ ++ match(Set dummy (GetAndAddL mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addL_acq [$mem], $incr\t#@get_and_addLi_no_resAcq" %} ++ ++ ins_encode %{ ++ __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set newval (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIAcq" %} ++ ++ ins_encode %{ ++ __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); ++ ++ match(Set dummy (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addI_no_resAcq" %} ++ ++ ins_encode %{ ++ __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAdd incr) ++%{ ++ predicate(needs_acquiring_load_exclusive(n)); ++ ++ match(Set newval (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI_acq $newval, [$mem], $incr\t#@get_and_addIiAcq" %} ++ ++ ins_encode %{ ++ __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAdd incr) ++%{ ++ predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n)); ++ ++ match(Set dummy (GetAndAddI mem incr)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "get_and_addI_acq [$mem], $incr\t#@get_and_addIi_no_resAcq" %} ++ ++ ins_encode %{ ++ __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base)); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++// ============================================================================ ++// Arithmetic Instructions ++// ++ ++// Integer Addition ++ ++// TODO ++// these currently employ operations which do not set CR and hence are ++// not flagged as killing CR but we would like to isolate the cases ++// where we want to set flags from those where we don't. need to work ++// out how to do that. ++instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "addw $dst, $src1, $src2\t#@addI_reg_reg" %} ++ ++ ins_encode %{ ++ __ addw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAdd src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm" %} ++ ++ ins_encode %{ ++ int32_t con = (int32_t)$src2$$constant; ++ __ addiw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++instruct addI_reg_imm_l2i(iRegINoSp dst, iRegL src1, immIAdd src2) %{ ++ match(Set dst (AddI (ConvL2I src1) src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "addiw $dst, $src1, $src2\t#@addI_reg_imm_l2i" %} ++ ++ ins_encode %{ ++ __ addiw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++// Pointer Addition ++instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "add $dst, $src1, $src2\t# ptr, #@addP_reg_reg" %} ++ ++ ins_encode %{ ++ __ add(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// If we shift more than 32 bits, we need not convert I2L. ++instruct lShiftL_regI_immGE32(iRegLNoSp dst, iRegI src, uimmI6_ge32 scale) %{ ++ match(Set dst (LShiftL (ConvI2L src) scale)); ++ ins_cost(ALU_COST); ++ format %{ "slli $dst, $src, $scale & 63\t#@lShiftL_regI_immGE32" %} ++ ++ ins_encode %{ ++ __ slli(as_Register($dst$$reg), as_Register($src$$reg), $scale$$constant & 63); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// Pointer Immediate Addition ++// n.b. this needs to be more expensive than using an indirect memory ++// operand ++instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAdd src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "addi $dst, $src1, $src2\t# ptr, #@addP_reg_imm" %} ++ ++ ins_encode %{ ++ // src2 is imm, so actually call the addi ++ __ add(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++// Long Addition ++instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "add $dst, $src1, $src2\t#@addL_reg_reg" %} ++ ++ ins_encode %{ ++ __ add(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// No constant pool entries requiredLong Immediate Addition. ++instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "addi $dst, $src1, $src2\t#@addL_reg_imm" %} ++ ++ ins_encode %{ ++ // src2 is imm, so actually call the addi ++ __ add(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++// Integer Subtraction ++instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "subw $dst, $src1, $src2\t#@subI_reg_reg" %} ++ ++ ins_encode %{ ++ __ subw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Immediate Subtraction ++instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immISub src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "addiw $dst, $src1, -$src2\t#@subI_reg_imm" %} ++ ++ ins_encode %{ ++ // src2 is imm, so actually call the addiw ++ __ subw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++// Long Subtraction ++instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "sub $dst, $src1, $src2\t#@subL_reg_reg" %} ++ ++ ins_encode %{ ++ __ sub(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// No constant pool entries requiredLong Immediate Subtraction. ++instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLSub src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "addi $dst, $src1, -$src2\t#@subL_reg_imm" %} ++ ++ ins_encode %{ ++ // src2 is imm, so actually call the addi ++ __ sub(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ $src2$$constant); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++// Integer Negation (special case for sub) ++ ++instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero) %{ ++ match(Set dst (SubI zero src)); ++ ins_cost(ALU_COST); ++ format %{ "subw $dst, x0, $src\t# int, #@negI_reg" %} ++ ++ ins_encode %{ ++ // actually call the subw ++ __ negw(as_Register($dst$$reg), ++ as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// Long Negation ++ ++instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero) %{ ++ match(Set dst (SubL zero src)); ++ ins_cost(ALU_COST); ++ format %{ "sub $dst, x0, $src\t# long, #@negL_reg" %} ++ ++ ins_encode %{ ++ // actually call the sub ++ __ neg(as_Register($dst$$reg), ++ as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// Integer Multiply ++ ++instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (MulI src1 src2)); ++ ins_cost(IMUL_COST); ++ format %{ "mulw $dst, $src1, $src2\t#@mulI" %} ++ ++ //this means 2 word multi, and no sign extend to 64 bits ++ ins_encode %{ ++ // riscv64 mulw will sign-extension to high 32 bits in dst reg ++ __ mulw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(imul_reg_reg); ++%} ++ ++// Long Multiply ++ ++instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (MulL src1 src2)); ++ ins_cost(IMUL_COST); ++ format %{ "mul $dst, $src1, $src2\t#@mulL" %} ++ ++ ins_encode %{ ++ __ mul(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(lmul_reg_reg); ++%} ++ ++instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2) ++%{ ++ match(Set dst (MulHiL src1 src2)); ++ ins_cost(IMUL_COST); ++ format %{ "mulh $dst, $src1, $src2\t# mulhi, #@mulHiL_rReg" %} ++ ++ ins_encode %{ ++ __ mulh(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(lmul_reg_reg); ++%} ++ ++// Integer Divide ++ ++instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (DivI src1 src2)); ++ ins_cost(IDIVSI_COST); ++ format %{ "divw $dst, $src1, $src2\t#@divI"%} ++ ++ ins_encode(riscv_enc_divw(dst, src1, src2)); ++ ins_pipe(idiv_reg_reg); ++%} ++ ++instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{ ++ match(Set dst (URShiftI (RShiftI src1 div1) div2)); ++ ins_cost(ALU_COST); ++ format %{ "srliw $dst, $src1, $div1\t# int signExtract, #@signExtract" %} ++ ++ ins_encode %{ ++ __ srliw(as_Register($dst$$reg), as_Register($src1$$reg), 31); ++ %} ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// Long Divide ++ ++instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (DivL src1 src2)); ++ ins_cost(IDIVDI_COST); ++ format %{ "div $dst, $src1, $src2\t#@divL" %} ++ ++ ins_encode(riscv_enc_div(dst, src1, src2)); ++ ins_pipe(ldiv_reg_reg); ++%} ++ ++instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{ ++ match(Set dst (URShiftL (RShiftL src1 div1) div2)); ++ ins_cost(ALU_COST); ++ format %{ "srli $dst, $src1, $div1\t# long signExtract, #@signExtractL" %} ++ ++ ins_encode %{ ++ __ srli(as_Register($dst$$reg), as_Register($src1$$reg), 63); ++ %} ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// Integer Remainder ++ ++instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (ModI src1 src2)); ++ ins_cost(IDIVSI_COST); ++ format %{ "remw $dst, $src1, $src2\t#@modI" %} ++ ++ ins_encode(riscv_enc_modw(dst, src1, src2)); ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Long Remainder ++ ++instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (ModL src1 src2)); ++ ins_cost(IDIVDI_COST); ++ format %{ "rem $dst, $src1, $src2\t#@modL" %} ++ ++ ins_encode(riscv_enc_mod(dst, src1, src2)); ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Integer Shifts ++ ++// Shift Left Register ++// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (LShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "sllw $dst, $src1, $src2\t#@lShiftI_reg_reg" %} ++ ++ ins_encode %{ ++ __ sllw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg_vshift); ++%} ++ ++// Shift Left Immediate ++instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ ++ match(Set dst (LShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "slliw $dst, $src1, ($src2 & 0x1f)\t#@lShiftI_reg_imm" %} ++ ++ ins_encode %{ ++ // the shift amount is encoded in the lower ++ // 5 bits of the I-immediate field for RV32I ++ __ slliw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x1f); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// Shift Right Logical Register ++// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (URShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "srlw $dst, $src1, $src2\t#@urShiftI_reg_reg" %} ++ ++ ins_encode %{ ++ __ srlw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg_vshift); ++%} ++ ++// Shift Right Logical Immediate ++instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ ++ match(Set dst (URShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "srliw $dst, $src1, ($src2 & 0x1f)\t#@urShiftI_reg_imm" %} ++ ++ ins_encode %{ ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ srliw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x1f); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// Shift Right Arithmetic Register ++// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{ ++ match(Set dst (RShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "sraw $dst, $src1, $src2\t#@rShiftI_reg_reg" %} ++ ++ ins_encode %{ ++ // riscv will sign-ext dst high 32 bits ++ __ sraw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg_vshift); ++%} ++ ++// Shift Right Arithmetic Immediate ++instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{ ++ match(Set dst (RShiftI src1 src2)); ++ ins_cost(ALU_COST); ++ format %{ "sraiw $dst, $src1, ($src2 & 0x1f)\t#@rShiftI_reg_imm" %} ++ ++ ins_encode %{ ++ // riscv will sign-ext dst high 32 bits ++ __ sraiw(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x1f); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// Long Shifts ++ ++// Shift Left Register ++// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ ++ match(Set dst (LShiftL src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "sll $dst, $src1, $src2\t#@lShiftL_reg_reg" %} ++ ++ ins_encode %{ ++ __ sll(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg_vshift); ++%} ++ ++// Shift Left Immediate ++instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ ++ match(Set dst (LShiftL src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "slli $dst, $src1, ($src2 & 0x3f)\t#@lShiftL_reg_imm" %} ++ ++ ins_encode %{ ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ slli(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x3f); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// Shift Right Logical Register ++// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ ++ match(Set dst (URShiftL src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "srl $dst, $src1, $src2\t#@urShiftL_reg_reg" %} ++ ++ ins_encode %{ ++ __ srl(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg_vshift); ++%} ++ ++// Shift Right Logical Immediate ++instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ ++ match(Set dst (URShiftL src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "srli $dst, $src1, ($src2 & 0x3f)\t#@urShiftL_reg_imm" %} ++ ++ ins_encode %{ ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ srli(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x3f); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// A special-case pattern for card table stores. ++instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{ ++ match(Set dst (URShiftL (CastP2X src1) src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "srli $dst, p2x($src1), ($src2 & 0x3f)\t#@urShiftP_reg_imm" %} ++ ++ ins_encode %{ ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ srli(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x3f); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// Shift Right Arithmetic Register ++// In RV64I, only the low 5 bits of src2 are considered for the shift amount ++instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{ ++ match(Set dst (RShiftL src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "sra $dst, $src1, $src2\t#@rShiftL_reg_reg" %} ++ ++ ins_encode %{ ++ __ sra(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg_vshift); ++%} ++ ++// Shift Right Arithmetic Immediate ++instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ ++ match(Set dst (RShiftL src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "srai $dst, $src1, ($src2 & 0x3f)\t#@rShiftL_reg_imm" %} ++ ++ ins_encode %{ ++ // the shift amount is encoded in the lower ++ // 6 bits of the I-immediate field for RV64I ++ __ srai(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (unsigned) $src2$$constant & 0x3f); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++instruct regI_not_reg(iRegINoSp dst, iRegI src1, immI_M1 m1) %{ ++ match(Set dst (XorI src1 m1)); ++ ins_cost(ALU_COST); ++ format %{ "xori $dst, $src1, -1\t#@regI_not_reg" %} ++ ++ ins_encode %{ ++ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct regL_not_reg(iRegLNoSp dst, iRegL src1, immL_M1 m1) %{ ++ match(Set dst (XorL src1 m1)); ++ ins_cost(ALU_COST); ++ format %{ "xori $dst, $src1, -1\t#@regL_not_reg" %} ++ ++ ins_encode %{ ++ __ xori(as_Register($dst$$reg), as_Register($src1$$reg), -1); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++ ++// ============================================================================ ++// Floating Point Arithmetic Instructions ++ ++instruct addF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (AddF src1 src2)); ++ ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fadd.s $dst, $src1, $src2\t#@addF_reg_reg" %} ++ ++ ins_encode %{ ++ __ fadd_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ++ ins_pipe(fp_dop_reg_reg_s); ++%} ++ ++instruct addD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (AddD src1 src2)); ++ ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fadd.d $dst, $src1, $src2\t#@addD_reg_reg" %} ++ ++ ins_encode %{ ++ __ fadd_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ++ ins_pipe(fp_dop_reg_reg_d); ++%} ++ ++instruct subF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (SubF src1 src2)); ++ ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fsub.s $dst, $src1, $src2\t#@subF_reg_reg" %} ++ ++ ins_encode %{ ++ __ fsub_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ++ ins_pipe(fp_dop_reg_reg_s); ++%} ++ ++instruct subD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (SubD src1 src2)); ++ ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fsub.d $dst, $src1, $src2\t#@subD_reg_reg" %} ++ ++ ins_encode %{ ++ __ fsub_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ++ ins_pipe(fp_dop_reg_reg_d); ++%} ++ ++instruct mulF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (MulF src1 src2)); ++ ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fmul.s $dst, $src1, $src2\t#@mulF_reg_reg" %} ++ ++ ins_encode %{ ++ __ fmul_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ++ ins_pipe(fp_dop_reg_reg_s); ++%} ++ ++instruct mulD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (MulD src1 src2)); ++ ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fmul.d $dst, $src1, $src2\t#@mulD_reg_reg" %} ++ ++ ins_encode %{ ++ __ fmul_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ++ ins_pipe(fp_dop_reg_reg_d); ++%} ++ ++// src1 * src2 + src3 ++instruct maddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary src1 src2))); ++ ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fmadd.s $dst, $src1, $src2, $src3\t#@maddF_reg_reg" %} ++ ++ ins_encode %{ ++ __ fmadd_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(pipe_class_default); ++%} ++ ++// src1 * src2 + src3 ++instruct maddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary src1 src2))); ++ ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fmadd.d $dst, $src1, $src2, $src3\t#@maddD_reg_reg" %} ++ ++ ins_encode %{ ++ __ fmadd_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(pipe_class_default); ++%} ++ ++// src1 * src2 - src3 ++instruct msubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary src1 src2))); ++ ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fmsub.s $dst, $src1, $src2, $src3\t#@msubF_reg_reg" %} ++ ++ ins_encode %{ ++ __ fmsub_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(pipe_class_default); ++%} ++ ++// src1 * src2 - src3 ++instruct msubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary src1 src2))); ++ ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fmsub.d $dst, $src1, $src2, $src3\t#@msubD_reg_reg" %} ++ ++ ins_encode %{ ++ __ fmsub_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(pipe_class_default); ++%} ++ ++// -src1 * src2 + src3 ++instruct nmsubF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary (NegF src1) src2))); ++ match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); ++ ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fnmsub.s $dst, $src1, $src2, $src3\t#@nmsubF_reg_reg" %} ++ ++ ins_encode %{ ++ __ fnmsub_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(pipe_class_default); ++%} ++ ++// -src1 * src2 + src3 ++instruct nmsubD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary (NegD src1) src2))); ++ match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); ++ ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fnmsub.d $dst, $src1, $src2, $src3\t#@nmsubD_reg_reg" %} ++ ++ ins_encode %{ ++ __ fnmsub_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(pipe_class_default); ++%} ++ ++// -src1 * src2 - src3 ++instruct nmaddF_reg_reg(fRegF dst, fRegF src1, fRegF src2, fRegF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); ++ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); ++ ++ ins_cost(FMUL_SINGLE_COST); ++ format %{ "fnmadd.s $dst, $src1, $src2, $src3\t#@nmaddF_reg_reg" %} ++ ++ ins_encode %{ ++ __ fnmadd_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(pipe_class_default); ++%} ++ ++// -src1 * src2 - src3 ++instruct nmaddD_reg_reg(fRegD dst, fRegD src1, fRegD src2, fRegD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); ++ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); ++ ++ ins_cost(FMUL_DOUBLE_COST); ++ format %{ "fnmadd.d $dst, $src1, $src2, $src3\t#@nmaddD_reg_reg" %} ++ ++ ins_encode %{ ++ __ fnmadd_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), ++ as_FloatRegister($src3$$reg)); ++ %} ++ ++ ins_pipe(pipe_class_default); ++%} ++ ++// Math.max(FF)F ++instruct maxF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (MaxF src1 src2)); ++ effect(TEMP_DEF dst, USE src1, USE src2); ++ ++ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST); ++ format %{ "fsflags zr\t#@maxF_reg_reg\n\t" ++ "fmax.s $dst, $src1, $src2\n\t" ++ "flt.s zr, $src1, $src2\n\t" ++ "frflags t0\n\t" ++ "beqz t0, Ldone\n\t" ++ "fadd.s $dst, $src1, $src2" %} ++ ++ ins_encode %{ ++ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ false); ++ %} ++ ++ ins_pipe(fp_dop_reg_reg_s); ++%} ++ ++// Math.min(FF)F ++instruct minF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (MinF src1 src2)); ++ effect(TEMP_DEF dst, USE src1, USE src2); ++ ++ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_SINGLE_COST + BRANCH_COST); ++ format %{ "fsflags zr\t#@minF_reg_reg\n\t" ++ "fmin.s $dst, $src1, $src2\n\t" ++ "flt.s zr, $src1, $src2\n\t" ++ "frflags t0\n\t" ++ "beqz t0, Ldone\n\t" ++ "fadd.s $dst, $src1, $src2" %} ++ ++ ins_encode %{ ++ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), /* is_double */ false, /* is_min */ true); ++ %} ++ ++ ins_pipe(fp_dop_reg_reg_s); ++%} ++ ++// Math.max(DD)D ++instruct maxD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (MaxD src1 src2)); ++ effect(TEMP_DEF dst, USE src1, USE src2); ++ ++ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST); ++ format %{ "fsflags zr\t#@maxD_reg_reg\n\t" ++ "fmax.d $dst, $src1, $src2\n\t" ++ "flt.d zr, $src1, $src2\n\t" ++ "frflags t0\n\t" ++ "beqz t0, Ldone\n\t" ++ "fadd.d $dst, $src1, $src2" %} ++ ++ ins_encode %{ ++ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ false); ++ %} ++ ++ ins_pipe(fp_dop_reg_reg_d); ++%} ++ ++// Math.min(DD)D ++instruct minD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (MinD src1 src2)); ++ effect(TEMP_DEF dst, USE src1, USE src2); ++ ++ ins_cost(2 * DEFAULT_COST + 2 * XFER_COST + FMUL_DOUBLE_COST + BRANCH_COST); ++ format %{ "fsflags zr\t#@minD_reg_reg\n\t" ++ "fmin.d $dst, $src1, $src2\n\t" ++ "flt.d zr, $src1, $src2\n\t" ++ "frflags t0\n\t" ++ "beqz t0, Ldone\n\t" ++ "fadd.d $dst, $src1, $src2" %} ++ ++ ins_encode %{ ++ __ minmax_FD(as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg), /* is_double */ true, /* is_min */ true); ++ %} ++ ++ ins_pipe(fp_dop_reg_reg_d); ++%} ++ ++instruct divF_reg_reg(fRegF dst, fRegF src1, fRegF src2) %{ ++ match(Set dst (DivF src1 src2)); ++ ++ ins_cost(FDIV_COST); ++ format %{ "fdiv.s $dst, $src1, $src2\t#@divF_reg_reg" %} ++ ++ ins_encode %{ ++ __ fdiv_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ++ ins_pipe(fp_div_s); ++%} ++ ++instruct divD_reg_reg(fRegD dst, fRegD src1, fRegD src2) %{ ++ match(Set dst (DivD src1 src2)); ++ ++ ins_cost(FDIV_COST); ++ format %{ "fdiv.d $dst, $src1, $src2\t#@divD_reg_reg" %} ++ ++ ins_encode %{ ++ __ fdiv_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ++ ins_pipe(fp_div_d); ++%} ++ ++instruct negF_reg_reg(fRegF dst, fRegF src) %{ ++ match(Set dst (NegF src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fsgnjn.s $dst, $src, $src\t#@negF_reg_reg" %} ++ ++ ins_encode %{ ++ __ fneg_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_uop_s); ++%} ++ ++instruct negD_reg_reg(fRegD dst, fRegD src) %{ ++ match(Set dst (NegD src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fsgnjn.d $dst, $src, $src\t#@negD_reg_reg" %} ++ ++ ins_encode %{ ++ __ fneg_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_uop_d); ++%} ++ ++instruct absI_reg(iRegINoSp dst, iRegIorL2I src) %{ ++ match(Set dst (AbsI src)); ++ ++ ins_cost(ALU_COST * 3); ++ format %{ "sraiw t0, $src, 0x1f\n\t" ++ "xorr $dst, $src, t0\n\t" ++ "subw $dst, $dst, t0\t#@absI_reg" %} ++ ++ ins_encode %{ ++ __ sraiw(t0, as_Register($src$$reg), 0x1f); ++ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct absI2L_reg(iRegLNoSp dst, iRegIorL2I src) %{ ++ match(Set dst (ConvI2L (AbsI src))); ++ ++ ins_cost(ALU_COST * 3); ++ format %{ "sraiw t0, $src, 0x1f\n\t" ++ "xorr $dst, $src, t0\n\t" ++ "subw $dst, $dst, t0\t#@absI2L_reg" %} ++ ++ ins_encode %{ ++ __ sraiw(t0, as_Register($src$$reg), 0x1f); ++ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ subw(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct absL_reg(iRegLNoSp dst, iRegL src) %{ ++ match(Set dst (AbsL src)); ++ ++ ins_cost(ALU_COST * 3); ++ format %{ "srai t0, $src, 0x3f\n\t" ++ "xorr $dst, $src, t0\n\t" ++ "sub $dst, $dst, t0\t#absL_reg" %} ++ ++ ins_encode %{ ++ __ srai(t0, as_Register($src$$reg), 0x3f); ++ __ xorr(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ __ sub(as_Register($dst$$reg), as_Register($dst$$reg), t0); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct absF_reg(fRegF dst, fRegF src) %{ ++ match(Set dst (AbsF src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fsgnjx.s $dst, $src, $src\t#@absF_reg" %} ++ ins_encode %{ ++ __ fabs_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_uop_s); ++%} ++ ++instruct absD_reg(fRegD dst, fRegD src) %{ ++ match(Set dst (AbsD src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fsgnjx.d $dst, $src, $src\t#@absD_reg" %} ++ ins_encode %{ ++ __ fabs_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_uop_d); ++%} ++ ++instruct sqrtF_reg(fRegF dst, fRegF src) %{ ++ match(Set dst (SqrtF src)); ++ ++ ins_cost(FSQRT_COST); ++ format %{ "fsqrt.s $dst, $src\t#@sqrtF_reg" %} ++ ins_encode %{ ++ __ fsqrt_s(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_sqrt_s); ++%} ++ ++instruct sqrtD_reg(fRegD dst, fRegD src) %{ ++ match(Set dst (SqrtD src)); ++ ++ ins_cost(FSQRT_COST); ++ format %{ "fsqrt.d $dst, $src\t#@sqrtD_reg" %} ++ ins_encode %{ ++ __ fsqrt_d(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_sqrt_d); ++%} ++ ++// Arithmetic Instructions End ++ ++// ============================================================================ ++// Logical Instructions ++ ++// Register And ++instruct andI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "andr $dst, $src1, $src2\t#@andI_reg_reg" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ andr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Immediate And ++instruct andI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "andi $dst, $src1, $src2\t#@andI_reg_imm" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ andi(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++// Register Or ++instruct orI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "orr $dst, $src1, $src2\t#@orI_reg_reg" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ orr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Immediate Or ++instruct orI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "ori $dst, $src1, $src2\t#@orI_reg_imm" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ ori(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++// Register Xor ++instruct xorI_reg_reg(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ++ format %{ "xorr $dst, $src1, $src2\t#@xorI_reg_reg" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ xorr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Immediate Xor ++instruct xorI_reg_imm(iRegINoSp dst, iRegI src1, immIAdd src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ++ format %{ "xori $dst, $src1, $src2\t#@xorI_reg_imm" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ xori(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++// Register And Long ++instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (AndL src1 src2)); ++ ++ format %{ "andr $dst, $src1, $src2\t#@andL_reg_reg" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ andr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Immediate And Long ++instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ++ match(Set dst (AndL src1 src2)); ++ ++ format %{ "andi $dst, $src1, $src2\t#@andL_reg_imm" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ andi(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++// Register Or Long ++instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (OrL src1 src2)); ++ ++ format %{ "orr $dst, $src1, $src2\t#@orL_reg_reg" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ orr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Immediate Or Long ++instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ++ match(Set dst (OrL src1 src2)); ++ ++ format %{ "ori $dst, $src1, $src2\t#@orL_reg_imm" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ ori(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++// Register Xor Long ++instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{ ++ match(Set dst (XorL src1 src2)); ++ ++ format %{ "xorr $dst, $src1, $src2\t#@xorL_reg_reg" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ xorr(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Immediate Xor Long ++instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLAdd src2) %{ ++ match(Set dst (XorL src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "xori $dst, $src1, $src2\t#@xorL_reg_imm" %} ++ ++ ins_encode %{ ++ __ xori(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ (int32_t)($src2$$constant)); ++ %} ++ ++ ins_pipe(ialu_reg_imm); ++%} ++ ++// ============================================================================ ++// BSWAP Instructions ++ ++instruct bytes_reverse_int(rFlagsReg cr, iRegINoSp dst, iRegIorL2I src) %{ ++ match(Set dst (ReverseBytesI src)); ++ effect(TEMP cr); ++ ++ ins_cost(ALU_COST * 13); ++ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int" %} ++ ++ ins_encode %{ ++ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct bytes_reverse_long(rFlagsReg cr, iRegLNoSp dst, iRegL src) %{ ++ match(Set dst (ReverseBytesL src)); ++ effect(TEMP cr); ++ ++ ins_cost(ALU_COST * 29); ++ format %{ "revb $dst, $src\t#@bytes_reverse_long" %} ++ ++ ins_encode %{ ++ __ revb(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{ ++ match(Set dst (ReverseBytesUS src)); ++ ++ ins_cost(ALU_COST * 5); ++ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short" %} ++ ++ ins_encode %{ ++ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{ ++ match(Set dst (ReverseBytesS src)); ++ ++ ins_cost(ALU_COST * 5); ++ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short" %} ++ ++ ins_encode %{ ++ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// ============================================================================ ++// MemBar Instruction ++ ++instruct load_fence() %{ ++ match(LoadFence); ++ ins_cost(ALU_COST); ++ ++ format %{ "#@load_fence" %} ++ ++ ins_encode %{ ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ %} ++ ins_pipe(pipe_serial); ++%} ++ ++instruct membar_acquire() %{ ++ match(MemBarAcquire); ++ ins_cost(ALU_COST); ++ ++ format %{ "#@membar_acquire\n\t" ++ "fence ir iorw" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_acquire"); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct membar_acquire_lock() %{ ++ match(MemBarAcquireLock); ++ ins_cost(0); ++ ++ format %{ "#@membar_acquire_lock (elided)" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_acquire_lock (elided)"); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct store_fence() %{ ++ match(StoreFence); ++ ins_cost(ALU_COST); ++ ++ format %{ "#@store_fence" %} ++ ++ ins_encode %{ ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ %} ++ ins_pipe(pipe_serial); ++%} ++ ++instruct membar_release() %{ ++ match(MemBarRelease); ++ ins_cost(ALU_COST); ++ ++ format %{ "#@membar_release\n\t" ++ "fence iorw ow" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_release"); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ %} ++ ins_pipe(pipe_serial); ++%} ++ ++instruct membar_storestore() %{ ++ match(MemBarStoreStore); ++ ins_cost(ALU_COST); ++ ++ format %{ "MEMBAR-store-store\t#@membar_storestore" %} ++ ++ ins_encode %{ ++ __ membar(MacroAssembler::StoreStore); ++ %} ++ ins_pipe(pipe_serial); ++%} ++ ++instruct membar_release_lock() %{ ++ match(MemBarReleaseLock); ++ ins_cost(0); ++ ++ format %{ "#@membar_release_lock (elided)" %} ++ ++ ins_encode %{ ++ __ block_comment("membar_release_lock (elided)"); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++instruct membar_volatile() %{ ++ match(MemBarVolatile); ++ ins_cost(ALU_COST); ++ ++ format %{ "#@membar_volatile\n\t" ++ "fence iorw iorw"%} ++ ++ ins_encode %{ ++ __ block_comment("membar_volatile"); ++ __ membar(MacroAssembler::StoreLoad); ++ %} ++ ++ ins_pipe(pipe_serial); ++%} ++ ++// ============================================================================ ++// Cast Instructions (Java-level type cast) ++ ++instruct castX2P(iRegPNoSp dst, iRegL src) %{ ++ match(Set dst (CastX2P src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $src\t# long -> ptr, #@castX2P" %} ++ ++ ins_encode %{ ++ if ($dst$$reg != $src$$reg) { ++ __ mv(as_Register($dst$$reg), as_Register($src$$reg)); ++ } ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct castP2X(iRegLNoSp dst, iRegP src) %{ ++ match(Set dst (CastP2X src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $src\t# ptr -> long, #@castP2X" %} ++ ++ ins_encode %{ ++ if ($dst$$reg != $src$$reg) { ++ __ mv(as_Register($dst$$reg), as_Register($src$$reg)); ++ } ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct castPP(iRegPNoSp dst) ++%{ ++ match(Set dst (CastPP dst)); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "# castPP of $dst, #@castPP" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(pipe_class_empty); ++%} ++ ++instruct castII(iRegI dst) ++%{ ++ match(Set dst (CastII dst)); ++ ++ size(0); ++ format %{ "# castII of $dst, #@castII" %} ++ ins_encode(/* empty encoding */); ++ ins_cost(0); ++ ins_pipe(pipe_class_empty); ++%} ++ ++instruct checkCastPP(iRegPNoSp dst) ++%{ ++ match(Set dst (CheckCastPP dst)); ++ ++ size(0); ++ ins_cost(0); ++ format %{ "# checkcastPP of $dst, #@checkCastPP" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(pipe_class_empty); ++%} ++ ++// ============================================================================ ++// Convert Instructions ++ ++// int to bool ++instruct convI2Bool(iRegINoSp dst, iRegI src) ++%{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "snez $dst, $src\t#@convI2Bool" %} ++ ++ ins_encode %{ ++ __ snez(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// pointer to bool ++instruct convP2Bool(iRegINoSp dst, iRegP src) ++%{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "snez $dst, $src\t#@convP2Bool" %} ++ ++ ins_encode %{ ++ __ snez(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// int <-> long ++ ++instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src) ++%{ ++ match(Set dst (ConvI2L src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "addw $dst, $src, zr\t#@convI2L_reg_reg" %} ++ ins_encode %{ ++ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); ++ %} ++ ins_pipe(ialu_reg); ++%} ++ ++instruct convL2I_reg(iRegINoSp dst, iRegL src) %{ ++ match(Set dst (ConvL2I src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "addw $dst, $src, zr\t#@convL2I_reg" %} ++ ++ ins_encode %{ ++ __ addw(as_Register($dst$$reg), as_Register($src$$reg), zr); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// int to unsigned long (Zero-extend) ++instruct convI2UL_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) ++%{ ++ match(Set dst (AndL (ConvI2L src) mask)); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ "zero_extend $dst, $src, 32\t# i2ul, #@convI2UL_reg_reg" %} ++ ++ ins_encode %{ ++ __ zero_extend(as_Register($dst$$reg), as_Register($src$$reg), 32); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// float <-> double ++ ++instruct convF2D_reg(fRegD dst, fRegF src) %{ ++ match(Set dst (ConvF2D src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.d.s $dst, $src\t#@convF2D_reg" %} ++ ++ ins_encode %{ ++ __ fcvt_d_s(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_f2d); ++%} ++ ++instruct convD2F_reg(fRegF dst, fRegD src) %{ ++ match(Set dst (ConvD2F src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.s.d $dst, $src\t#@convD2F_reg" %} ++ ++ ins_encode %{ ++ __ fcvt_s_d(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_d2f); ++%} ++ ++// float <-> int ++ ++instruct convF2I_reg_reg(iRegINoSp dst, fRegF src) %{ ++ match(Set dst (ConvF2I src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.w.s $dst, $src\t#@convF2I_reg_reg" %} ++ ++ ins_encode %{ ++ __ fcvt_w_s_safe($dst$$Register, $src$$FloatRegister); ++ %} ++ ++ ins_pipe(fp_f2i); ++%} ++ ++instruct convI2F_reg_reg(fRegF dst, iRegIorL2I src) %{ ++ match(Set dst (ConvI2F src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.s.w $dst, $src\t#@convI2F_reg_reg" %} ++ ++ ins_encode %{ ++ __ fcvt_s_w(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(fp_i2f); ++%} ++ ++// float <-> long ++ ++instruct convF2L_reg_reg(iRegLNoSp dst, fRegF src) %{ ++ match(Set dst (ConvF2L src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.l.s $dst, $src\t#@convF2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ fcvt_l_s_safe($dst$$Register, $src$$FloatRegister); ++ %} ++ ++ ins_pipe(fp_f2l); ++%} ++ ++instruct convL2F_reg_reg(fRegF dst, iRegL src) %{ ++ match(Set dst (ConvL2F src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.s.l $dst, $src\t#@convL2F_reg_reg" %} ++ ++ ins_encode %{ ++ __ fcvt_s_l(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(fp_l2f); ++%} ++ ++// double <-> int ++ ++instruct convD2I_reg_reg(iRegINoSp dst, fRegD src) %{ ++ match(Set dst (ConvD2I src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.w.d $dst, $src\t#@convD2I_reg_reg" %} ++ ++ ins_encode %{ ++ __ fcvt_w_d_safe($dst$$Register, $src$$FloatRegister); ++ %} ++ ++ ins_pipe(fp_d2i); ++%} ++ ++instruct convI2D_reg_reg(fRegD dst, iRegIorL2I src) %{ ++ match(Set dst (ConvI2D src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.d.w $dst, $src\t#@convI2D_reg_reg" %} ++ ++ ins_encode %{ ++ __ fcvt_d_w(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(fp_i2d); ++%} ++ ++// double <-> long ++ ++instruct convD2L_reg_reg(iRegLNoSp dst, fRegD src) %{ ++ match(Set dst (ConvD2L src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.l.d $dst, $src\t#@convD2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ fcvt_l_d_safe($dst$$Register, $src$$FloatRegister); ++ %} ++ ++ ins_pipe(fp_d2l); ++%} ++ ++instruct convL2D_reg_reg(fRegD dst, iRegL src) %{ ++ match(Set dst (ConvL2D src)); ++ ++ ins_cost(XFER_COST); ++ format %{ "fcvt.d.l $dst, $src\t#@convL2D_reg_reg" %} ++ ++ ins_encode %{ ++ __ fcvt_d_l(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(fp_l2d); ++%} ++ ++// Convert oop into int for vectors alignment masking ++instruct convP2I(iRegINoSp dst, iRegP src) %{ ++ match(Set dst (ConvL2I (CastP2X src))); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ "zero_extend $dst, $src, 32\t# ptr -> int, #@convP2I" %} ++ ++ ins_encode %{ ++ __ zero_extend($dst$$Register, $src$$Register, 32); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// Convert compressed oop into int for vectors alignment masking ++// in case of 32bit oops (heap < 4Gb). ++instruct convN2I(iRegINoSp dst, iRegN src) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ match(Set dst (ConvL2I (CastP2X (DecodeN src)))); ++ ++ ins_cost(ALU_COST); ++ format %{ "mv $dst, $src\t# compressed ptr -> int, #@convN2I" %} ++ ++ ins_encode %{ ++ __ mv($dst$$Register, $src$$Register); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// Convert oop pointer into compressed form ++instruct encodeHeapOop(iRegNNoSp dst, iRegP src) %{ ++ match(Set dst (EncodeP src)); ++ ins_cost(ALU_COST); ++ format %{ "encode_heap_oop $dst, $src\t#@encodeHeapOop" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ __ encode_heap_oop(d, s); ++ %} ++ ins_pipe(ialu_reg); ++%} ++ ++instruct decodeHeapOop(iRegPNoSp dst, iRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && ++ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ ++ ins_cost(0); ++ format %{ "decode_heap_oop $dst, $src\t#@decodeHeapOop" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ __ decode_heap_oop(d, s); ++ %} ++ ins_pipe(ialu_reg); ++%} ++ ++instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || ++ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ ++ ins_cost(0); ++ format %{ "decode_heap_oop_not_null $dst, $src\t#@decodeHeapOop_not_null" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ __ decode_heap_oop_not_null(d, s); ++ %} ++ ins_pipe(ialu_reg); ++%} ++ ++// Convert klass pointer into compressed form. ++instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{ ++ match(Set dst (EncodePKlass src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "encode_klass_not_null $dst, $src\t#@encodeKlass_not_null" %} ++ ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ __ encode_klass_not_null(dst_reg, src_reg, t0); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{ ++ predicate(!maybe_use_tmp_register_decoding_klass()); ++ ++ match(Set dst (DecodeNKlass src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %} ++ ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ __ decode_klass_not_null(dst_reg, src_reg, UseCompressedOops ? xheapbase : t0); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct decodeKlass_not_null_with_tmp(iRegPNoSp dst, iRegN src, rFlagsReg tmp) %{ ++ predicate(maybe_use_tmp_register_decoding_klass()); ++ ++ match(Set dst (DecodeNKlass src)); ++ ++ effect(TEMP tmp); ++ ++ ins_cost(ALU_COST); ++ format %{ "decode_klass_not_null $dst, $src\t#@decodeKlass_not_null" %} ++ ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ Register tmp_reg = as_Register($tmp$$reg); ++ __ decode_klass_not_null(dst_reg, src_reg, tmp_reg); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// stack <-> reg and reg <-> reg shuffles with no conversion ++ ++instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{ ++ ++ match(Set dst (MoveF2I src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(LOAD_COST); ++ ++ format %{ "lw $dst, $src\t#@MoveF2I_stack_reg" %} ++ ++ ins_encode %{ ++ __ lw(as_Register($dst$$reg), Address(sp, $src$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_reg); ++ ++%} ++ ++instruct MoveI2F_stack_reg(fRegF dst, stackSlotI src) %{ ++ ++ match(Set dst (MoveI2F src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(LOAD_COST); ++ ++ format %{ "flw $dst, $src\t#@MoveI2F_stack_reg" %} ++ ++ ins_encode %{ ++ __ flw(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); ++ %} ++ ++ ins_pipe(pipe_class_memory); ++ ++%} ++ ++instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{ ++ ++ match(Set dst (MoveD2L src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(LOAD_COST); ++ ++ format %{ "ld $dst, $src\t#@MoveD2L_stack_reg" %} ++ ++ ins_encode %{ ++ __ ld(as_Register($dst$$reg), Address(sp, $src$$disp)); ++ %} ++ ++ ins_pipe(iload_reg_reg); ++ ++%} ++ ++instruct MoveL2D_stack_reg(fRegD dst, stackSlotL src) %{ ++ ++ match(Set dst (MoveL2D src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(LOAD_COST); ++ ++ format %{ "fld $dst, $src\t#@MoveL2D_stack_reg" %} ++ ++ ins_encode %{ ++ __ fld(as_FloatRegister($dst$$reg), Address(sp, $src$$disp)); ++ %} ++ ++ ins_pipe(pipe_class_memory); ++ ++%} ++ ++instruct MoveF2I_reg_stack(stackSlotI dst, fRegF src) %{ ++ ++ match(Set dst (MoveF2I src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(STORE_COST); ++ ++ format %{ "fsw $src, $dst\t#@MoveF2I_reg_stack" %} ++ ++ ins_encode %{ ++ __ fsw(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); ++ %} ++ ++ ins_pipe(pipe_class_memory); ++ ++%} ++ ++instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{ ++ ++ match(Set dst (MoveI2F src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(STORE_COST); ++ ++ format %{ "sw $src, $dst\t#@MoveI2F_reg_stack" %} ++ ++ ins_encode %{ ++ __ sw(as_Register($src$$reg), Address(sp, $dst$$disp)); ++ %} ++ ++ ins_pipe(istore_reg_reg); ++ ++%} ++ ++instruct MoveD2L_reg_stack(stackSlotL dst, fRegD src) %{ ++ ++ match(Set dst (MoveD2L src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(STORE_COST); ++ ++ format %{ "fsd $dst, $src\t#@MoveD2L_reg_stack" %} ++ ++ ins_encode %{ ++ __ fsd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp)); ++ %} ++ ++ ins_pipe(pipe_class_memory); ++ ++%} ++ ++instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{ ++ ++ match(Set dst (MoveL2D src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(STORE_COST); ++ ++ format %{ "sd $src, $dst\t#@MoveL2D_reg_stack" %} ++ ++ ins_encode %{ ++ __ sd(as_Register($src$$reg), Address(sp, $dst$$disp)); ++ %} ++ ++ ins_pipe(istore_reg_reg); ++ ++%} ++ ++instruct MoveF2I_reg_reg(iRegINoSp dst, fRegF src) %{ ++ ++ match(Set dst (MoveF2I src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.x.w $dst, $src\t#@MoveL2D_reg_stack" %} ++ ++ ins_encode %{ ++ __ fmv_x_w(as_Register($dst$$reg), as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_f2i); ++ ++%} ++ ++instruct MoveI2F_reg_reg(fRegF dst, iRegI src) %{ ++ ++ match(Set dst (MoveI2F src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.w.x $dst, $src\t#@MoveI2F_reg_reg" %} ++ ++ ins_encode %{ ++ __ fmv_w_x(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(fp_i2f); ++ ++%} ++ ++instruct MoveD2L_reg_reg(iRegLNoSp dst, fRegD src) %{ ++ ++ match(Set dst (MoveD2L src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.x.d $dst, $src\t#@MoveD2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ fmv_x_d(as_Register($dst$$reg), as_FloatRegister($src$$reg)); ++ %} ++ ++ ins_pipe(fp_d2l); ++ ++%} ++ ++instruct MoveL2D_reg_reg(fRegD dst, iRegL src) %{ ++ ++ match(Set dst (MoveL2D src)); ++ ++ effect(DEF dst, USE src); ++ ++ ins_cost(XFER_COST); ++ ++ format %{ "fmv.d.x $dst, $src\t#@MoveD2L_reg_reg" %} ++ ++ ins_encode %{ ++ __ fmv_d_x(as_FloatRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(fp_l2d); ++%} ++ ++// ============================================================================ ++// Compare Instructions which set the result float comparisons in dest register. ++ ++instruct cmpF3_reg_reg(iRegINoSp dst, fRegF op1, fRegF op2) ++%{ ++ match(Set dst (CmpF3 op1 op2)); ++ ++ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST); ++ format %{ "flt.s $dst, $op2, $op1\t#@cmpF3_reg_reg\n\t" ++ "bgtz $dst, done\n\t" ++ "feq.s $dst, $op1, $op2\n\t" ++ "addi $dst, $dst, -1\t#@cmpF3_reg_reg" ++ %} ++ ++ ins_encode %{ ++ // we want -1 for unordered or less than, 0 for equal and 1 for greater than. ++ __ float_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/); ++ %} ++ ++ ins_pipe(pipe_class_default); ++%} ++ ++instruct cmpD3_reg_reg(iRegINoSp dst, fRegD op1, fRegD op2) ++%{ ++ match(Set dst (CmpD3 op1 op2)); ++ ++ ins_cost(XFER_COST * 2 + BRANCH_COST + ALU_COST); ++ format %{ "flt.d $dst, $op2, $op1\t#@cmpD3_reg_reg\n\t" ++ "bgtz $dst, done\n\t" ++ "feq.d $dst, $op1, $op2\n\t" ++ "addi $dst, $dst, -1\t#@cmpD3_reg_reg" ++ %} ++ ++ ins_encode %{ ++ // we want -1 for unordered or less than, 0 for equal and 1 for greater than. ++ __ double_compare(as_Register($dst$$reg), as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), -1 /*unordered_result < 0*/); ++ %} ++ ++ ins_pipe(pipe_class_default); ++%} ++ ++instruct cmpL3_reg_reg(iRegINoSp dst, iRegL op1, iRegL op2) ++%{ ++ match(Set dst (CmpL3 op1 op2)); ++ ++ ins_cost(ALU_COST * 3 + BRANCH_COST); ++ format %{ "slt $dst, $op2, $op1\t#@cmpL3_reg_reg\n\t" ++ "bnez $dst, done\n\t" ++ "slt $dst, $op1, $op2\n\t" ++ "neg $dst, $dst\t#@cmpL3_reg_reg" ++ %} ++ ins_encode %{ ++ __ cmp_l2i(t0, as_Register($op1$$reg), as_Register($op2$$reg)); ++ __ mv(as_Register($dst$$reg), t0); ++ %} ++ ++ ins_pipe(pipe_class_default); ++%} ++ ++instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegI p, iRegI q) ++%{ ++ match(Set dst (CmpLTMask p q)); ++ ++ ins_cost(2 * ALU_COST); ++ ++ format %{ "slt $dst, $p, $q\t#@cmpLTMask_reg_reg\n\t" ++ "subw $dst, zr, $dst\t#@cmpLTMask_reg_reg" ++ %} ++ ++ ins_encode %{ ++ __ slt(as_Register($dst$$reg), as_Register($p$$reg), as_Register($q$$reg)); ++ __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I op, immI0 zero) ++%{ ++ match(Set dst (CmpLTMask op zero)); ++ ++ ins_cost(ALU_COST); ++ ++ format %{ "sraiw $dst, $dst, 31\t#@cmpLTMask_reg_reg" %} ++ ++ ins_encode %{ ++ __ sraiw(as_Register($dst$$reg), as_Register($op$$reg), 31); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++ ++// ============================================================================ ++// Max and Min ++ ++instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2) ++%{ ++ match(Set dst (MinI src1 src2)); ++ ++ effect(DEF dst, USE src1, USE src2); ++ ++ ins_cost(BRANCH_COST + ALU_COST * 2); ++ format %{ ++ "ble $src1, $src2, Lsrc1.\t#@minI_rReg\n\t" ++ "mv $dst, $src2\n\t" ++ "j Ldone\n\t" ++ "bind Lsrc1\n\t" ++ "mv $dst, $src1\n\t" ++ "bind\t#@minI_rReg" ++ %} ++ ++ ins_encode %{ ++ Label Lsrc1, Ldone; ++ __ ble(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1); ++ __ mv(as_Register($dst$$reg), as_Register($src2$$reg)); ++ __ j(Ldone); ++ __ bind(Lsrc1); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2) ++%{ ++ match(Set dst (MaxI src1 src2)); ++ ++ effect(DEF dst, USE src1, USE src2); ++ ++ ins_cost(BRANCH_COST + ALU_COST * 2); ++ format %{ ++ "bge $src1, $src2, Lsrc1\t#@maxI_rReg\n\t" ++ "mv $dst, $src2\n\t" ++ "j Ldone\n\t" ++ "bind Lsrc1\n\t" ++ "mv $dst, $src1\n\t" ++ "bind\t#@maxI_rReg" ++ %} ++ ++ ins_encode %{ ++ Label Lsrc1, Ldone; ++ __ bge(as_Register($src1$$reg), as_Register($src2$$reg), Lsrc1); ++ __ mv(as_Register($dst$$reg), as_Register($src2$$reg)); ++ __ j(Ldone); ++ __ bind(Lsrc1); ++ __ mv(as_Register($dst$$reg), as_Register($src1$$reg)); ++ __ bind(Ldone); ++ ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// ============================================================================ ++// Branch Instructions ++// Direct Branch. ++instruct branch(label lbl) ++%{ ++ match(Goto); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "j $lbl\t#@branch" %} ++ ++ ins_encode(riscv_enc_j(lbl)); ++ ++ ins_pipe(pipe_branch); ++%} ++ ++// ============================================================================ ++// Compare and Branch Instructions ++ ++// Patterns for short (< 12KiB) variants ++ ++// Compare flags and branch near instructions. ++instruct cmpFlag_branch(cmpOpEqNe cmp, rFlagsReg cr, label lbl) %{ ++ match(If cmp cr); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $cr, zr, $lbl\t#@cmpFlag_branch" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label)); ++ %} ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare signed int and branch near instructions ++instruct cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) ++%{ ++ // Same match rule as `far_cmpI_branch'. ++ match(If cmp (CmpI op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) ++%{ ++ // Same match rule as `far_cmpI_loop'. ++ match(CountedLoopEnd cmp (CmpI op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpI_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare unsigned int and branch near instructions ++instruct cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) ++%{ ++ // Same match rule as `far_cmpU_branch'. ++ match(If cmp (CmpU op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) ++%{ ++ // Same match rule as `far_cmpU_loop'. ++ match(CountedLoopEnd cmp (CmpU op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpU_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare signed long and branch near instructions ++instruct cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) ++%{ ++ // Same match rule as `far_cmpL_branch'. ++ match(If cmp (CmpL op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpL_loop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) ++%{ ++ // Same match rule as `far_cmpL_loop'. ++ match(CountedLoopEnd cmp (CmpL op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpL_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare unsigned long and branch near instructions ++instruct cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) ++%{ ++ // Same match rule as `far_cmpUL_branch'. ++ match(If cmp (CmpUL op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) ++%{ ++ // Same match rule as `far_cmpUL_loop'. ++ match(CountedLoopEnd cmp (CmpUL op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpUL_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare pointer and branch near instructions ++instruct cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) ++%{ ++ // Same match rule as `far_cmpP_branch'. ++ match(If cmp (CmpP op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) ++%{ ++ // Same match rule as `far_cmpP_loop'. ++ match(CountedLoopEnd cmp (CmpP op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpP_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare narrow pointer and branch near instructions ++instruct cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) ++%{ ++ // Same match rule as `far_cmpN_branch'. ++ match(If cmp (CmpN op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) ++%{ ++ // Same match rule as `far_cmpN_loop'. ++ match(CountedLoopEnd cmp (CmpN op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, $op2, $lbl\t#@cmpN_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare float and branch near instructions ++instruct cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) ++%{ ++ // Same match rule as `far_cmpF_branch'. ++ match(If cmp (CmpF op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST); ++ format %{ "float_b$cmp $op1, $op2 $lbl \t#@cmpF_branch"%} ++ ++ ins_encode %{ ++ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_class_compare); ++ ins_short_branch(1); ++%} ++ ++instruct cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) ++%{ ++ // Same match rule as `far_cmpF_loop'. ++ match(CountedLoopEnd cmp (CmpF op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST); ++ format %{ "float_b$cmp $op1, $op2, $lbl\t#@cmpF_loop"%} ++ ++ ins_encode %{ ++ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_class_compare); ++ ins_short_branch(1); ++%} ++ ++// Compare double and branch near instructions ++instruct cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++%{ ++ // Same match rule as `far_cmpD_branch'. ++ match(If cmp (CmpD op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST); ++ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_branch"%} ++ ++ ins_encode %{ ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_class_compare); ++ ins_short_branch(1); ++%} ++ ++instruct cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++%{ ++ // Same match rule as `far_cmpD_loop'. ++ match(CountedLoopEnd cmp (CmpD op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST); ++ format %{ "double_b$cmp $op1, $op2, $lbl\t#@cmpD_loop"%} ++ ++ ins_encode %{ ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_class_compare); ++ ins_short_branch(1); ++%} ++ ++// Compare signed int with zero and branch near instructions ++instruct cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpI_reg_imm0_branch'. ++ match(If cmp (CmpI op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpI_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpI op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpI_reg_imm0_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare unsigned int with zero and branch near instructions ++instruct cmpUEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_branch'. ++ match(If cmp (CmpU op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_branch" %} ++ ++ ins_encode %{ ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpUEqNeLeGt_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpU op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpUEqNeLeGt_reg_imm0_loop" %} ++ ++ ++ ins_encode %{ ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare signed long with zero and branch near instructions ++instruct cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpL_reg_imm0_branch'. ++ match(If cmp (CmpL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpL_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpL_reg_imm0_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare unsigned long with zero and branch near instructions ++instruct cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_branch'. ++ match(If cmp (CmpUL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_branch" %} ++ ++ ins_encode %{ ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ // Same match rule as `far_cmpULEqNeLeGt_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpUL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpULEqNeLeGt_reg_imm0_loop" %} ++ ++ ins_encode %{ ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare pointer with zero and branch near instructions ++instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ ++ // Same match rule as `far_cmpP_reg_imm0_branch'. ++ match(If cmp (CmpP op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_branch" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ ++ // Same match rule as `far_cmpP_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpP op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_imm0_loop" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare narrow pointer with zero and branch near instructions ++instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ ++ // Same match rule as `far_cmpN_reg_imm0_branch'. ++ match(If cmp (CmpN op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_branch" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ ++ // Same match rule as `far_cmpN_reg_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpN op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpN_imm0_loop" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++// Compare narrow pointer with pointer zero and branch near instructions ++instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ ++ // Same match rule as `far_cmpP_narrowOop_imm0_branch'. ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_branch" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++instruct cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ ++ // Same match rule as `far_cmpP_narrowOop_imm0_loop'. ++ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "b$cmp $op1, zr, $lbl\t#@cmpP_narrowOop_imm0_loop" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label)); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++ ins_short_branch(1); ++%} ++ ++// Patterns for far (20KiB) variants ++ ++instruct far_cmpFlag_branch(cmpOp cmp, rFlagsReg cr, label lbl) %{ ++ match(If cmp cr); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "far_b$cmp $cr, zr, $lbl\t#@far_cmpFlag_branch"%} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($cr$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++// Compare signed int and branch far instructions ++instruct far_cmpI_branch(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{ ++ match(If cmp (CmpI op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ // the format instruction [far_b$cmp] here is be used as two insructions ++ // in macroassembler: b$not_cmp(op1, op2, done), j($lbl), bind(done) ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++%} ++ ++instruct far_cmpI_loop(cmpOp cmp, iRegI op1, iRegI op2, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpI op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpI_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++%} ++ ++instruct far_cmpU_branch(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ ++ match(If cmp (CmpU op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++%} ++ ++instruct far_cmpU_loop(cmpOpU cmp, iRegI op1, iRegI op2, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpU op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpU_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++%} ++ ++instruct far_cmpL_branch(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{ ++ match(If cmp (CmpL op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++%} ++ ++instruct far_cmpLloop(cmpOp cmp, iRegL op1, iRegL op2, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpL op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpL_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++%} ++ ++instruct far_cmpUL_branch(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ ++ match(If cmp (CmpUL op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++%} ++ ++instruct far_cmpUL_loop(cmpOpU cmp, iRegL op1, iRegL op2, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpUL op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpUL_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++%} ++ ++instruct far_cmpP_branch(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) ++%{ ++ match(If cmp (CmpP op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++%} ++ ++instruct far_cmpP_loop(cmpOpU cmp, iRegP op1, iRegP op2, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpP op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpP_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++%} ++ ++instruct far_cmpN_branch(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) ++%{ ++ match(If cmp (CmpN op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++%} ++ ++instruct far_cmpN_loop(cmpOpU cmp, iRegN op1, iRegN op2, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpN op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, $op2, $lbl\t#@far_cmpN_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode | MacroAssembler::unsigned_branch_mask, as_Register($op1$$reg), ++ as_Register($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmp_branch); ++%} ++ ++// Float compare and branch instructions ++instruct far_cmpF_branch(cmpOp cmp, fRegF op1, fRegF op2, label lbl) ++%{ ++ match(If cmp (CmpF op1 op2)); ++ ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST * 2); ++ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_branch"%} ++ ++ ins_encode %{ ++ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), ++ *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_class_compare); ++%} ++ ++instruct far_cmpF_loop(cmpOp cmp, fRegF op1, fRegF op2, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpF op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST * 2); ++ format %{ "far_float_b$cmp $op1, $op2, $lbl\t#@far_cmpF_loop"%} ++ ++ ins_encode %{ ++ __ float_cmp_branch($cmp$$cmpcode, as_FloatRegister($op1$$reg), as_FloatRegister($op2$$reg), ++ *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_class_compare); ++%} ++ ++// Double compare and branch instructions ++instruct far_cmpD_branch(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++%{ ++ match(If cmp (CmpD op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST * 2); ++ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_branch"%} ++ ++ ins_encode %{ ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_class_compare); ++%} ++ ++instruct far_cmpD_loop(cmpOp cmp, fRegD op1, fRegD op2, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpD op1 op2)); ++ effect(USE lbl); ++ ++ ins_cost(XFER_COST + BRANCH_COST * 2); ++ format %{ "far_double_b$cmp $op1, $op2, $lbl\t#@far_cmpD_loop"%} ++ ++ ins_encode %{ ++ __ float_cmp_branch($cmp$$cmpcode | MacroAssembler::double_branch_mask, as_FloatRegister($op1$$reg), ++ as_FloatRegister($op2$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_class_compare); ++%} ++ ++instruct far_cmpI_reg_imm0_branch(cmpOp cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(If cmp (CmpI op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpI_reg_imm0_loop(cmpOp cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpI op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpI_reg_imm0_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpUEqNeLeGt_imm0_branch(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(If cmp (CmpU op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_imm0_branch" %} ++ ++ ins_encode %{ ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpUEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpU op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpUEqNeLeGt_reg_imm0_loop" %} ++ ++ ++ ins_encode %{ ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++// compare lt/ge unsigned instructs has no short instruct with same match ++instruct far_cmpULtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(If cmp (CmpU op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_branch" %} ++ ++ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpULtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegI op1, immI0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpU op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "j $lbl if $cmp == ge\t#@far_cmpULtGe_reg_imm0_loop" %} ++ ++ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpL_reg_imm0_branch(cmpOp cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(If cmp (CmpL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_branch" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpL_reg_imm0_loop(cmpOp cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpL_reg_imm0_loop" %} ++ ++ ins_encode %{ ++ __ cmp_branch($cmp$$cmpcode, as_Register($op1$$reg), zr, *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpULEqNeLeGt_reg_imm0_branch(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(If cmp (CmpUL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_branch" %} ++ ++ ins_encode %{ ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpULEqNeLeGt_reg_imm0_loop(cmpOpUEqNeLeGt cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpUL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpULEqNeLeGt_reg_imm0_loop" %} ++ ++ ins_encode %{ ++ __ enc_cmpUEqNeLeGt_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++// compare lt/ge unsigned instructs has no short instruct with same match ++instruct far_cmpULLtGe_reg_imm0_branch(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(If cmp (CmpUL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_branch" %} ++ ++ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpULLtGe_reg_imm0_loop(cmpOpULtGe cmp, iRegL op1, immL0 zero, label lbl) ++%{ ++ match(CountedLoopEnd cmp (CmpUL op1 zero)); ++ ++ effect(USE op1, USE lbl); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "j $lbl if $cmp == ge\t#@far_cmpULLtGe_reg_imm0_loop" %} ++ ++ ins_encode(riscv_enc_far_cmpULtGe_imm0_branch(cmp, op1, lbl)); ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_branch" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpP_imm0_loop(cmpOpEqNe cmp, iRegP op1, immP0 zero, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpP op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_imm0_loop" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ ++ match(If cmp (CmpN op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_branch" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpN_imm0_loop(cmpOpEqNe cmp, iRegN op1, immN0 zero, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpN op1 zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpN_imm0_loop" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_branch" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++instruct far_cmpP_narrowOop_imm0_loop(cmpOpEqNe cmp, iRegN op1, immP0 zero, label lbl) %{ ++ match(CountedLoopEnd cmp (CmpP (DecodeN op1) zero)); ++ effect(USE lbl); ++ ++ ins_cost(BRANCH_COST * 2); ++ format %{ "far_b$cmp $op1, zr, $lbl\t#@far_cmpP_narrowOop_imm0_loop" %} ++ ++ ins_encode %{ ++ __ enc_cmpEqNe_imm0_branch($cmp$$cmpcode, as_Register($op1$$reg), *($lbl$$label), /* is_far */ true); ++ %} ++ ++ ins_pipe(pipe_cmpz_branch); ++%} ++ ++// ============================================================================ ++// Conditional Move Instructions ++instruct cmovI_cmpI(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpI\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} ++ ++ ins_encode %{ ++ __ enc_cmove($cop$$cmpcode, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovI_cmpL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpL\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} ++ ++ ins_encode %{ ++ __ enc_cmove($cop$$cmpcode, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovI_cmpU(iRegINoSp dst, iRegI src, iRegI op1, iRegI op2, cmpOpU cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpU op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ format %{ "bneg$cop $op1, $op2, skip\t#@cmovI_cmpU\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} ++ ++ ins_encode %{ ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovI_cmpUL(iRegINoSp dst, iRegI src, iRegL op1, iRegL op2, cmpOpU cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ format %{ "bneg$cop $op1 $op2, skip\t#@cmovI_cmpUL\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} ++ ++ ins_encode %{ ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpL\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} ++ ++ ins_encode %{ ++ __ enc_cmove($cop$$cmpcode, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpUL(iRegLNoSp dst, iRegL src, iRegL op1, iRegL op2, cmpOpU cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL op1 op2)) (Binary dst src))); ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ "bneg$cop $op1, $op2, skip\t#@cmovL_cmpUL\n\t" ++ "mv $dst, $src\n\t" ++ "skip:" ++ %} ++ ++ ins_encode %{ ++ __ enc_cmove($cop$$cmpcode | MacroAssembler::unsigned_branch_mask, ++ as_Register($op1$$reg), as_Register($op2$$reg), ++ as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// ============================================================================ ++// Procedure Call/Return Instructions ++ ++// Call Java Static Instruction ++ ++instruct CallStaticJavaDirect(method meth) ++%{ ++ match(CallStaticJava); ++ ++ effect(USE meth); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "CALL,static $meth\t#@CallStaticJavaDirect" %} ++ ++ ins_encode( riscv_enc_java_static_call(meth), ++ riscv_enc_call_epilog ); ++ ++ ins_pipe(pipe_class_call); ++%} ++ ++// TO HERE ++ ++// Call Java Dynamic Instruction ++instruct CallDynamicJavaDirect(method meth, rFlagsReg cr) ++%{ ++ match(CallDynamicJava); ++ ++ effect(USE meth, KILL cr); ++ ++ ins_cost(BRANCH_COST + ALU_COST * 6); ++ ++ format %{ "CALL,dynamic $meth\t#@CallDynamicJavaDirect" %} ++ ++ ins_encode( riscv_enc_java_dynamic_call(meth), ++ riscv_enc_call_epilog ); ++ ++ ins_pipe(pipe_class_call); ++%} ++ ++// Call Runtime Instruction ++ ++instruct CallRuntimeDirect(method meth, rFlagsReg cr) ++%{ ++ match(CallRuntime); ++ ++ effect(USE meth, KILL cr); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "CALL, runtime $meth\t#@CallRuntimeDirect" %} ++ ++ ins_encode( riscv_enc_java_to_runtime(meth) ); ++ ++ ins_pipe(pipe_class_call); ++%} ++ ++// Call Runtime Instruction ++ ++instruct CallLeafDirect(method meth, rFlagsReg cr) ++%{ ++ match(CallLeaf); ++ ++ effect(USE meth, KILL cr); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "CALL, runtime leaf $meth\t#@CallLeafDirect" %} ++ ++ ins_encode( riscv_enc_java_to_runtime(meth) ); ++ ++ ins_pipe(pipe_class_call); ++%} ++ ++// Call Runtime Instruction ++ ++instruct CallLeafNoFPDirect(method meth, rFlagsReg cr) ++%{ ++ match(CallLeafNoFP); ++ ++ effect(USE meth, KILL cr); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "CALL, runtime leaf nofp $meth\t#@CallLeafNoFPDirect" %} ++ ++ ins_encode( riscv_enc_java_to_runtime(meth) ); ++ ++ ins_pipe(pipe_class_call); ++%} ++ ++// ============================================================================ ++// Partial Subtype Check ++// ++// superklass array for an instance of the superklass. Set a hidden ++// internal cache on a hit (cache is checked with exposed code in ++// gen_subtype_check()). Return zero for a hit. The encoding ++// ALSO sets flags. ++ ++instruct partialSubtypeCheck(rFlagsReg cr, iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result) ++%{ ++ match(Set result (PartialSubtypeCheck sub super)); ++ effect(KILL temp, KILL cr); ++ ++ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); ++ format %{ "partialSubtypeCheck $result, $sub, $super\t#@partialSubtypeCheck" %} ++ ++ ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result)); ++ ++ opcode(0x1); // Force zero of result reg on hit ++ ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct partialSubtypeCheckVsZero(iRegP_R14 sub, iRegP_R10 super, iRegP_R12 temp, iRegP_R15 result, ++ immP0 zero, rFlagsReg cr) ++%{ ++ match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); ++ effect(KILL temp, KILL result); ++ ++ ins_cost(2 * STORE_COST + 3 * LOAD_COST + 4 * ALU_COST + BRANCH_COST * 4); ++ format %{ "partialSubtypeCheck $result, $sub, $super == 0\t#@partialSubtypeCheckVsZero" %} ++ ++ ins_encode(riscv_enc_partial_subtype_check(sub, super, temp, result)); ++ ++ opcode(0x0); // Don't zero result reg on hit ++ ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} ++ ins_encode %{ ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, ++ StrIntrinsicNode::UU); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, ++ StrIntrinsicNode::LL); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ ++ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, ++ StrIntrinsicNode::UL); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegP_R28 tmp1, iRegL_R29 tmp2, iRegL_R30 tmp3, ++ rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); ++ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(KILL tmp1, KILL tmp2, KILL tmp3, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); ++ ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, ++ StrIntrinsicNode::LU); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_indexofUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, ++ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %} ++ ++ ins_encode %{ ++ __ string_indexof($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ $tmp5$$Register, $tmp6$$Register, ++ $result$$Register, StrIntrinsicNode::UU); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_indexofLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, ++ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %} ++ ++ ins_encode %{ ++ __ string_indexof($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ $tmp5$$Register, $tmp6$$Register, ++ $result$$Register, StrIntrinsicNode::LL); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_indexofUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, ++ iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg tmp) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL tmp); ++ format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %} ++ ++ ins_encode %{ ++ __ string_indexof($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ $tmp5$$Register, $tmp6$$Register, ++ $result$$Register, StrIntrinsicNode::UL); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_indexof_conUU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, ++ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %} ++ ++ ins_encode %{ ++ int icnt2 = (int)$int_cnt2$$constant; ++ __ string_indexof_linearscan($str1$$Register, $str2$$Register, ++ $cnt1$$Register, zr, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ icnt2, $result$$Register, StrIntrinsicNode::UU); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_indexof_conLL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, ++ immI_le_4 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %} ++ ++ ins_encode %{ ++ int icnt2 = (int)$int_cnt2$$constant; ++ __ string_indexof_linearscan($str1$$Register, $str2$$Register, ++ $cnt1$$Register, zr, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ icnt2, $result$$Register, StrIntrinsicNode::LL); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_indexof_conUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, ++ immI_1 int_cnt2, iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) ++%{ ++ predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); ++ format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %} ++ ++ ins_encode %{ ++ int icnt2 = (int)$int_cnt2$$constant; ++ __ string_indexof_linearscan($str1$$Register, $str2$$Register, ++ $cnt1$$Register, zr, ++ $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, ++ icnt2, $result$$Register, StrIntrinsicNode::UL); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct stringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) ++%{ ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); ++ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); ++ ++ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} ++ ++ ins_encode %{ ++ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, false /* isU */) ; ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++ ++instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch, ++ iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2, ++ iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg tmp) ++%{ ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); ++ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP_DEF result, ++ TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL tmp); ++ ++ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} ++ ++ ins_encode %{ ++ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, ++ $result$$Register, $tmp1$$Register, $tmp2$$Register, ++ $tmp3$$Register, $tmp4$$Register, true /* isL */); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++// clearing of an array ++instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) ++%{ ++ predicate(!UseRVV); ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL cnt, USE_KILL base, KILL cr); ++ ++ ins_cost(4 * DEFAULT_COST); ++ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} ++ ++ ins_encode %{ ++ address tpc = __ zero_words($base$$Register, $cnt$$Register); ++ if (tpc == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct clearArray_imm_reg(immL cnt, iRegP_R28 base, Universe dummy, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && (uint64_t)n->in(2)->get_long() ++ < (uint64_t)(BlockZeroingLowLimit >> LogBytesPerWord)); ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL base, KILL cr); ++ ++ ins_cost(4 * DEFAULT_COST); ++ format %{ "ClearArray $cnt, $base\t#@clearArray_imm_reg" %} ++ ++ ins_encode %{ ++ __ zero_words($base$$Register, (uint64_t)$cnt$$constant); ++ %} ++ ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, ++ iRegI_R10 result, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); ++ ++ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} ++ ins_encode %{ ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_equals($str1$$Register, $str2$$Register, ++ $result$$Register, $cnt$$Register, 1); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct string_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, ++ iRegI_R10 result, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr); ++ ++ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} ++ ins_encode %{ ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_equals($str1$$Register, $str2$$Register, ++ $result$$Register, $cnt$$Register, 2); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct array_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, ++ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, ++ iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (AryEq ary1 ary2)); ++ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++ ++ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} ++ ins_encode %{ ++ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, ++ $result$$Register, $tmp$$Register, 1); ++ if (tpc == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct array_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, ++ iRegP_R13 tmp1, iRegP_R14 tmp2, iRegP_R15 tmp3, ++ iRegP_R16 tmp4, iRegP_R28 tmp, rFlagsReg cr) ++%{ ++ predicate(!UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (AryEq ary1 ary2)); ++ effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); ++ ++ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} ++ ins_encode %{ ++ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register, ++ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, ++ $result$$Register, $tmp$$Register, 2); ++ if (tpc == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++// ============================================================================ ++// Safepoint Instructions ++ ++instruct safePoint(iRegP poll) ++%{ ++ match(SafePoint poll); ++ ++ ins_cost(2 * LOAD_COST); ++ format %{ ++ "lwu zr, [$poll]\t# Safepoint: poll for GC, #@safePoint" ++ %} ++ ins_encode %{ ++ __ read_polling_page(as_Register($poll$$reg), 0, relocInfo::poll_type); ++ %} ++ ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem); ++%} ++ ++// ============================================================================ ++// This name is KNOWN by the ADLC and cannot be changed. ++// The ADLC forces a 'TypeRawPtr::BOTTOM' output type ++// for this guy. ++instruct tlsLoadP(javaThread_RegP dst) ++%{ ++ match(Set dst (ThreadLocal)); ++ ++ ins_cost(0); ++ ++ format %{ " -- \t// $dst=Thread::current(), empty, #@tlsLoadP" %} ++ ++ size(0); ++ ++ ins_encode( /*empty*/ ); ++ ++ ins_pipe(pipe_class_empty); ++%} ++ ++// inlined locking and unlocking ++// using t1 as the 'flag' register to bridge the BoolNode producers and consumers ++instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) ++%{ ++ match(Set cr (FastLock object box)); ++ effect(TEMP tmp, TEMP tmp2); ++ ++ ins_cost(LOAD_COST * 2 + STORE_COST * 3 + ALU_COST * 6 + BRANCH_COST * 3); ++ format %{ "fastlock $object,$box\t! kills $tmp,$tmp2, #@cmpFastLock" %} ++ ++ ins_encode(riscv_enc_fast_lock(object, box, tmp, tmp2)); ++ ++ ins_pipe(pipe_serial); ++%} ++ ++// using t1 as the 'flag' register to bridge the BoolNode producers and consumers ++instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2) ++%{ ++ match(Set cr (FastUnlock object box)); ++ effect(TEMP tmp, TEMP tmp2); ++ ++ ins_cost(LOAD_COST * 2 + STORE_COST + ALU_COST * 2 + BRANCH_COST * 4); ++ format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2, #@cmpFastUnlock" %} ++ ++ ins_encode(riscv_enc_fast_unlock(object, box, tmp, tmp2)); ++ ++ ins_pipe(pipe_serial); ++%} ++ ++// Tail Call; Jump from runtime stub to Java code. ++// Also known as an 'interprocedural jump'. ++// Target of jump will eventually return to caller. ++// TailJump below removes the return address. ++instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop) ++%{ ++ match(TailCall jump_target method_oop); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "jalr $jump_target\t# $method_oop holds method oop, #@TailCalljmpInd." %} ++ ++ ins_encode(riscv_enc_tail_call(jump_target)); ++ ++ ins_pipe(pipe_class_call); ++%} ++ ++instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R10 ex_oop) ++%{ ++ match(TailJump jump_target ex_oop); ++ ++ ins_cost(ALU_COST + BRANCH_COST); ++ ++ format %{ "jalr $jump_target\t# $ex_oop holds exception oop, #@TailjmpInd." %} ++ ++ ins_encode(riscv_enc_tail_jmp(jump_target)); ++ ++ ins_pipe(pipe_class_call); ++%} ++ ++// Create exception oop: created by stack-crawling runtime code. ++// Created exception is now available to this handler, and is setup ++// just prior to jumping to this handler. No code emitted. ++instruct CreateException(iRegP_R10 ex_oop) ++%{ ++ match(Set ex_oop (CreateEx)); ++ ++ ins_cost(0); ++ format %{ " -- \t// exception oop; no code emitted, #@CreateException" %} ++ ++ size(0); ++ ++ ins_encode( /*empty*/ ); ++ ++ ins_pipe(pipe_class_empty); ++%} ++ ++// Rethrow exception: The exception oop will come in the first ++// argument position. Then JUMP (not call) to the rethrow stub code. ++instruct RethrowException() ++%{ ++ match(Rethrow); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "j rethrow_stub\t#@RethrowException" %} ++ ++ ins_encode( riscv_enc_rethrow() ); ++ ++ ins_pipe(pipe_class_call); ++%} ++ ++// Return Instruction ++// epilog node loads ret address into ra as part of frame pop ++instruct Ret() ++%{ ++ match(Return); ++ ++ ins_cost(BRANCH_COST); ++ format %{ "ret\t// return register, #@Ret" %} ++ ++ ins_encode(riscv_enc_ret()); ++ ++ ins_pipe(pipe_branch); ++%} ++ ++// Die now. ++instruct ShouldNotReachHere() %{ ++ match(Halt); ++ ++ ins_cost(BRANCH_COST); ++ ++ format %{ "#@ShouldNotReachHere" %} ++ ++ ins_encode %{ ++ if (is_reachable()) { ++ __ halt(); ++ } ++ %} ++ ++ ins_pipe(pipe_class_default); ++%} ++ ++ ++//----------PEEPHOLE RULES----------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++// ++// peepmatch ( root_instr_name [preceding_instruction]* ); ++// ++// peepconstraint %{ ++// (instruction_number.operand_name relational_op instruction_number.operand_name ++// [, ...] ); ++// // instruction numbers are zero-based using left to right order in peepmatch ++// ++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); ++// // provide an instruction_number.operand_name for each operand that appears ++// // in the replacement instruction's match rule ++// ++// ---------VM FLAGS--------------------------------------------------------- ++// ++// All peephole optimizations can be turned off using -XX:-OptoPeephole ++// ++// Each peephole rule is given an identifying number starting with zero and ++// increasing by one in the order seen by the parser. An individual peephole ++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# ++// on the command-line. ++// ++// ---------CURRENT LIMITATIONS---------------------------------------------- ++// ++// Only match adjacent instructions in same basic block ++// Only equality constraints ++// Only constraints between operands, not (0.dest_reg == RAX_enc) ++// Only one replacement instruction ++// ++//----------SMARTSPILL RULES--------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++ ++// Local Variables: ++// mode: c++ ++// End: +diff --git a/src/hotspot/cpu/riscv/riscv_b.ad b/src/hotspot/cpu/riscv/riscv_b.ad +new file mode 100644 +index 000000000..6f7055a39 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/riscv_b.ad +@@ -0,0 +1,605 @@ ++// ++// Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// RISCV Bit-Manipulation Extension Architecture Description File ++ ++instruct rorI_imm_b(iRegINoSp dst, iRegI src, immI rshift, immI lshift) %{ ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); ++ predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 32)); ++ effect(DEF dst, USE src); ++ ++ format %{ "roriw $dst, $src, ($rshift & 0x1f)\t#@rorI_imm_b" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ roriw(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x1f); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++instruct rorL_imm_b(iRegLNoSp dst, iRegL src, immI rshift, immI lshift) %{ ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ predicate(UseZbb && ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) == 64)); ++ effect(DEF dst, USE src); ++ ++ format %{ "rori $dst, $src, ($rshift & 0x3f)\t#@rorL_imm_b" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ rori(as_Register($dst$$reg), as_Register($src$$reg), $rshift$$constant & 0x3f); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// ror expander ++instruct rorI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ ++ effect(DEF dst, USE src, USE shift); ++ ++ format %{ "rorw $dst, $src, $shift\t#@rorI_reg_b" %} ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ rorw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// ror expander ++instruct rorL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ ++ effect(DEF dst, USE src, USE shift); ++ ++ format %{ "ror $dst, $src, $shift\t#@rorL_reg_b" %} ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ ror(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); ++ %} ++ ins_pipe(ialu_reg_reg); ++%} ++ ++ ++instruct rorI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{ ++ predicate(UseZbb); ++ match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI imm32 shift)))); ++ ++ expand %{ ++ rorI_reg_b(dst, src, shift); ++ %} ++%} ++ ++instruct rorI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{ ++ predicate(UseZbb); ++ match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI zero shift)))); ++ ++ expand %{ ++ rorI_reg_b(dst, src, shift); ++ %} ++%} ++ ++instruct rorL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{ ++ predicate(UseZbb); ++ match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI imm64 shift)))); ++ ++ expand %{ ++ rorL_reg_b(dst, src, shift); ++ %} ++%} ++ ++instruct rorL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{ ++ predicate(UseZbb); ++ match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI zero shift)))); ++ ++ expand %{ ++ rorL_reg_b(dst, src, shift); ++ %} ++%} ++ ++// rol expander ++instruct rolI_reg_b(iRegINoSp dst, iRegI src, iRegI shift) %{ ++ effect(DEF dst, USE src, USE shift); ++ ++ format %{ "rolw $dst, $src, $shift\t#@rolI_reg_b" %} ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ rolw(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// rol expander ++instruct rolL_reg_b(iRegLNoSp dst, iRegL src, iRegI shift) %{ ++ effect(DEF dst, USE src, USE shift); ++ ++ format %{ "rol $dst, $src, $shift\t#@rolL_reg_b" %} ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ rol(as_Register($dst$$reg), as_Register($src$$reg), as_Register($shift$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct rolI_rReg_Var_C_32_b(iRegINoSp dst, iRegI src, iRegI shift, immI_32 imm32) %{ ++ predicate(UseZbb); ++ match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI imm32 shift)))); ++ ++ expand %{ ++ rolI_reg_b(dst, src, shift); ++ %} ++%} ++ ++instruct rolI_rReg_Var_C0_b(iRegINoSp dst, iRegI src, iRegI shift, immI0 zero) %{ ++ predicate(UseZbb); ++ match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI zero shift)))); ++ ++ expand %{ ++ rolI_reg_b(dst, src, shift); ++ %} ++%} ++ ++instruct rolL_rReg_Var_C_64_b(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 imm64) %{ ++ predicate(UseZbb); ++ match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI imm64 shift)))); ++ ++ expand %{ ++ rolL_reg_b(dst, src, shift); ++ %} ++%} ++ ++instruct rolL_rReg_Var_C0_b(iRegLNoSp dst, iRegL src, iRegI shift, immI0 zero) %{ ++ predicate(UseZbb); ++ match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI zero shift)))); ++ ++ expand %{ ++ rolL_reg_b(dst, src, shift); ++ %} ++%} ++ ++// Convert oop into int for vectors alignment masking ++instruct convP2I_b(iRegINoSp dst, iRegP src) %{ ++ predicate(UseZba); ++ match(Set dst (ConvL2I (CastP2X src))); ++ ++ format %{ "zext.w $dst, $src\t# ptr -> int @convP2I_b" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// byte to int ++instruct convB2I_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_24 lshift, immI_24 rshift) %{ ++ predicate(UseZbb); ++ match(Set dst (RShiftI (LShiftI src lshift) rshift)); ++ ++ format %{ "sext.b $dst, $src\t# b2i, #@convB2I_reg_reg_b" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ sext_b(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// int to short ++instruct convI2S_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16 lshift, immI_16 rshift) %{ ++ predicate(UseZbb); ++ match(Set dst (RShiftI (LShiftI src lshift) rshift)); ++ ++ format %{ "sext.h $dst, $src\t# i2s, #@convI2S_reg_reg_b" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ sext_h(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// short to unsigned int ++instruct convS2UI_reg_reg_b(iRegINoSp dst, iRegIorL2I src, immI_16bits mask) %{ ++ predicate(UseZbb); ++ match(Set dst (AndI src mask)); ++ ++ format %{ "zext.h $dst, $src\t# s2ui, #@convS2UI_reg_reg_b" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ zext_h(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// int to unsigned long (zero extend) ++instruct convI2UL_reg_reg_b(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask) %{ ++ predicate(UseZba); ++ match(Set dst (AndL (ConvI2L src) mask)); ++ ++ format %{ "zext.w $dst, $src\t# i2ul, #@convI2UL_reg_reg_b" %} ++ ++ ins_cost(ALU_COST); ++ ins_encode %{ ++ __ zext_w(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_shift); ++%} ++ ++// BSWAP instructions ++instruct bytes_reverse_int_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); ++ match(Set dst (ReverseBytesI src)); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ "revb_w_w $dst, $src\t#@bytes_reverse_int_b" %} ++ ++ ins_encode %{ ++ __ revb_w_w(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct bytes_reverse_long_b(iRegLNoSp dst, iRegL src) %{ ++ predicate(UseZbb); ++ match(Set dst (ReverseBytesL src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "rev8 $dst, $src\t#@bytes_reverse_long_b" %} ++ ++ ins_encode %{ ++ __ rev8(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct bytes_reverse_unsigned_short_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); ++ match(Set dst (ReverseBytesUS src)); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ "revb_h_h_u $dst, $src\t#@bytes_reverse_unsigned_short_b" %} ++ ++ ins_encode %{ ++ __ revb_h_h_u(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct bytes_reverse_short_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); ++ match(Set dst (ReverseBytesS src)); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ "revb_h_h $dst, $src\t#@bytes_reverse_short_b" %} ++ ++ ins_encode %{ ++ __ revb_h_h(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// Shift Add Pointer ++instruct shaddP_reg_reg_b(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale imm) %{ ++ predicate(UseZba); ++ match(Set dst (AddP src1 (LShiftL src2 imm))); ++ ++ ins_cost(ALU_COST); ++ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_b" %} ++ ++ ins_encode %{ ++ __ shadd(as_Register($dst$$reg), ++ as_Register($src2$$reg), ++ as_Register($src1$$reg), ++ t0, ++ $imm$$constant); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct shaddP_reg_reg_ext_b(iRegPNoSp dst, iRegP src1, iRegI src2, immIScale imm) %{ ++ predicate(UseZba); ++ match(Set dst (AddP src1 (LShiftL (ConvI2L src2) imm))); ++ ++ ins_cost(ALU_COST); ++ format %{ "shadd $dst, $src2, $src1, $imm\t# ptr, #@shaddP_reg_reg_ext_b" %} ++ ++ ins_encode %{ ++ __ shadd(as_Register($dst$$reg), ++ as_Register($src2$$reg), ++ as_Register($src1$$reg), ++ t0, ++ $imm$$constant); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Shift Add Long ++instruct shaddL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immIScale imm) %{ ++ predicate(UseZba); ++ match(Set dst (AddL src1 (LShiftL src2 imm))); ++ ++ ins_cost(ALU_COST); ++ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_b" %} ++ ++ ins_encode %{ ++ __ shadd(as_Register($dst$$reg), ++ as_Register($src2$$reg), ++ as_Register($src1$$reg), ++ t0, ++ $imm$$constant); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct shaddL_reg_reg_ext_b(iRegLNoSp dst, iRegL src1, iRegI src2, immIScale imm) %{ ++ predicate(UseZba); ++ match(Set dst (AddL src1 (LShiftL (ConvI2L src2) imm))); ++ ++ ins_cost(ALU_COST); ++ format %{ "shadd $dst, $src2, $src1, $imm\t#@shaddL_reg_reg_ext_b" %} ++ ++ ins_encode %{ ++ __ shadd(as_Register($dst$$reg), ++ as_Register($src2$$reg), ++ as_Register($src1$$reg), ++ t0, ++ $imm$$constant); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Zeros Count instructions ++instruct countLeadingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); ++ match(Set dst (CountLeadingZerosI src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "clzw $dst, $src\t#@countLeadingZerosI_b" %} ++ ++ ins_encode %{ ++ __ clzw(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct countLeadingZerosL_b(iRegINoSp dst, iRegL src) %{ ++ predicate(UseZbb); ++ match(Set dst (CountLeadingZerosL src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "clz $dst, $src\t#@countLeadingZerosL_b" %} ++ ++ ins_encode %{ ++ __ clz(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct countTrailingZerosI_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UseZbb); ++ match(Set dst (CountTrailingZerosI src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "ctzw $dst, $src\t#@countTrailingZerosI_b" %} ++ ++ ins_encode %{ ++ __ ctzw(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++instruct countTrailingZerosL_b(iRegINoSp dst, iRegL src) %{ ++ predicate(UseZbb); ++ match(Set dst (CountTrailingZerosL src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "ctz $dst, $src\t#@countTrailingZerosL_b" %} ++ ++ ins_encode %{ ++ __ ctz(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// Population Count instructions ++instruct popCountI_b(iRegINoSp dst, iRegIorL2I src) %{ ++ predicate(UsePopCountInstruction); ++ match(Set dst (PopCountI src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "cpopw $dst, $src\t#@popCountI_b" %} ++ ++ ins_encode %{ ++ __ cpopw(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// Note: Long/bitCount(long) returns an int. ++instruct popCountL_b(iRegINoSp dst, iRegL src) %{ ++ predicate(UsePopCountInstruction); ++ match(Set dst (PopCountL src)); ++ ++ ins_cost(ALU_COST); ++ format %{ "cpop $dst, $src\t#@popCountL_b" %} ++ ++ ins_encode %{ ++ __ cpop(as_Register($dst$$reg), as_Register($src$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg); ++%} ++ ++// Max and Min ++instruct minI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ predicate(UseZbb); ++ match(Set dst (MinI src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "min $dst, $src1, $src2\t#@minI_reg_b" %} ++ ++ ins_encode %{ ++ __ min(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct maxI_reg_b(iRegINoSp dst, iRegI src1, iRegI src2) %{ ++ predicate(UseZbb); ++ match(Set dst (MaxI src1 src2)); ++ ++ ins_cost(ALU_COST); ++ format %{ "max $dst, $src1, $src2\t#@maxI_reg_b" %} ++ ++ ins_encode %{ ++ __ max(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Abs ++instruct absI_reg_b(iRegINoSp dst, iRegI src) %{ ++ predicate(UseZbb); ++ match(Set dst (AbsI src)); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ ++ "negw t0, $src\n\t" ++ "max $dst, $src, t0\t#@absI_reg_b" ++ %} ++ ++ ins_encode %{ ++ __ negw(t0, as_Register($src$$reg)); ++ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct absL_reg_b(iRegLNoSp dst, iRegL src) %{ ++ predicate(UseZbb); ++ match(Set dst (AbsL src)); ++ ++ ins_cost(ALU_COST * 2); ++ format %{ ++ "neg t0, $src\n\t" ++ "max $dst, $src, t0\t#@absL_reg_b" ++ %} ++ ++ ins_encode %{ ++ __ neg(t0, as_Register($src$$reg)); ++ __ max(as_Register($dst$$reg), as_Register($src$$reg), t0); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// And Not ++instruct andnI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ ++ predicate(UseZbb); ++ match(Set dst (AndI src1 (XorI src2 m1))); ++ ++ ins_cost(ALU_COST); ++ format %{ "andn $dst, $src1, $src2\t#@andnI_reg_reg_b" %} ++ ++ ins_encode %{ ++ __ andn(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct andnL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ ++ predicate(UseZbb); ++ match(Set dst (AndL src1 (XorL src2 m1))); ++ ++ ins_cost(ALU_COST); ++ format %{ "andn $dst, $src1, $src2\t#@andnL_reg_reg_b" %} ++ ++ ins_encode %{ ++ __ andn(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++// Or Not ++instruct ornI_reg_reg_b(iRegINoSp dst, iRegI src1, iRegI src2, immI_M1 m1) %{ ++ predicate(UseZbb); ++ match(Set dst (OrI src1 (XorI src2 m1))); ++ ++ ins_cost(ALU_COST); ++ format %{ "orn $dst, $src1, $src2\t#@ornI_reg_reg_b" %} ++ ++ ins_encode %{ ++ __ orn(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} ++ ++instruct ornL_reg_reg_b(iRegLNoSp dst, iRegL src1, iRegL src2, immL_M1 m1) %{ ++ predicate(UseZbb); ++ match(Set dst (OrL src1 (XorL src2 m1))); ++ ++ ins_cost(ALU_COST); ++ format %{ "orn $dst, $src1, $src2\t#@ornL_reg_reg_b" %} ++ ++ ins_encode %{ ++ __ orn(as_Register($dst$$reg), ++ as_Register($src1$$reg), ++ as_Register($src2$$reg)); ++ %} ++ ++ ins_pipe(ialu_reg_reg); ++%} +diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad +new file mode 100644 +index 000000000..905041890 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/riscv_v.ad +@@ -0,0 +1,1723 @@ ++// ++// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2020, Arm Limited. All rights reserved. ++// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// RISCV VEC Architecture Description File ++ ++opclass vmemA(indirect); ++ ++source_hpp %{ ++ bool op_vec_supported(int opcode); ++%} ++ ++source %{ ++ ++ static inline BasicType vector_element_basic_type(const MachNode* n) { ++ const TypeVect* vt = n->bottom_type()->is_vect(); ++ return vt->element_basic_type(); ++ } ++ ++ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { ++ int def_idx = use->operand_index(opnd); ++ Node* def = use->in(def_idx); ++ const TypeVect* vt = def->bottom_type()->is_vect(); ++ return vt->element_basic_type(); ++ } ++ ++ static void loadStore(MacroAssembler masm, bool is_store, ++ VectorRegister reg, BasicType bt, Register base) { ++ Assembler::SEW sew = Assembler::elemtype_to_sew(bt); ++ masm.vsetvli(t0, x0, sew); ++ if (is_store) { ++ masm.vsex_v(reg, base, sew); ++ } else { ++ masm.vlex_v(reg, base, sew); ++ } ++ } ++ ++ bool op_vec_supported(int opcode) { ++ switch (opcode) { ++ // No multiply reduction instructions ++ case Op_MulReductionVD: ++ case Op_MulReductionVF: ++ case Op_MulReductionVI: ++ case Op_MulReductionVL: ++ // Others ++ case Op_Extract: ++ case Op_ExtractB: ++ case Op_ExtractC: ++ case Op_ExtractD: ++ case Op_ExtractF: ++ case Op_ExtractI: ++ case Op_ExtractL: ++ case Op_ExtractS: ++ case Op_ExtractUB: ++ return false; ++ default: ++ return UseRVV; ++ } ++ } ++ ++%} ++ ++definitions %{ ++ int_def VEC_COST (200, 200); ++%} ++ ++// All VEC instructions ++ ++// vector load/store ++instruct loadV(vReg dst, vmemA mem) %{ ++ match(Set dst (LoadVector mem)); ++ ins_cost(VEC_COST); ++ format %{ "vle $dst, $mem\t#@loadV" %} ++ ins_encode %{ ++ VectorRegister dst_reg = as_VectorRegister($dst$$reg); ++ loadStore(MacroAssembler(&cbuf), false, dst_reg, ++ vector_element_basic_type(this), as_Register($mem$$base)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct storeV(vReg src, vmemA mem) %{ ++ match(Set mem (StoreVector mem src)); ++ ins_cost(VEC_COST); ++ format %{ "vse $src, $mem\t#@storeV" %} ++ ins_encode %{ ++ VectorRegister src_reg = as_VectorRegister($src$$reg); ++ loadStore(MacroAssembler(&cbuf), true, src_reg, ++ vector_element_basic_type(this, $src), as_Register($mem$$base)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector abs ++ ++instruct vabsB(vReg dst, vReg src, vReg tmp) %{ ++ match(Set dst (AbsVB src)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t" ++ "vmax.vv $dst, $tmp, $src" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); ++ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vabsS(vReg dst, vReg src, vReg tmp) %{ ++ match(Set dst (AbsVS src)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t" ++ "vmax.vv $dst, $tmp, $src" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); ++ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vabsI(vReg dst, vReg src, vReg tmp) %{ ++ match(Set dst (AbsVI src)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t" ++ "vmax.vv $dst, $tmp, $src" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); ++ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vabsL(vReg dst, vReg src, vReg tmp) %{ ++ match(Set dst (AbsVL src)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t" ++ "vmax.vv $dst, $tmp, $src" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg)); ++ __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vabsF(vReg dst, vReg src) %{ ++ match(Set dst (AbsVF src)); ++ ins_cost(VEC_COST); ++ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vabsD(vReg dst, vReg src) %{ ++ match(Set dst (AbsVD src)); ++ ins_cost(VEC_COST); ++ format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector add ++ ++instruct vaddB(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AddVB src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vadd_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vaddS(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AddVS src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vadd_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vaddI(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AddVI src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vadd_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vaddL(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AddVL src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vadd_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vaddF(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AddVF src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfadd_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vaddD(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AddVD src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfadd_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector and ++ ++instruct vand(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (AndV src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vand.vv $dst, $src1, $src2\t#@vand" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vand_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector or ++ ++instruct vor(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (OrV src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vor.vv $dst, $src1, $src2\t#@vor" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vor_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector xor ++ ++instruct vxor(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (XorV src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vxor.vv $dst, $src1, $src2\t#@vxor" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vxor_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector float div ++ ++instruct vdivF(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (DivVF src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfdiv_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vdivD(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (DivVD src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfdiv.vv $dst, $src1, $src2\t#@vdivD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfdiv_vv(as_VectorRegister($dst$$reg), ++ as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector fmla ++ ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector fmls ++ ++// dst_src1 = dst_src1 + -src2 * src3 ++// dst_src1 = dst_src1 + src2 * -src3 ++instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); ++ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); ++ ins_cost(VEC_COST); ++ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 + -src2 * src3 ++// dst_src1 = dst_src1 + src2 * -src3 ++instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); ++ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); ++ ins_cost(VEC_COST); ++ format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector fnmla ++ ++// dst_src1 = -dst_src1 + -src2 * src3 ++// dst_src1 = -dst_src1 + src2 * -src3 ++instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); ++ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); ++ ins_cost(VEC_COST); ++ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = -dst_src1 + -src2 * src3 ++// dst_src1 = -dst_src1 + src2 * -src3 ++instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); ++ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); ++ ins_cost(VEC_COST); ++ format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector fnmls ++ ++// dst_src1 = -dst_src1 + src2 * src3 ++instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = -dst_src1 + src2 * src3 ++instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA); ++ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector mla ++ ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmacc_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector mls ++ ++// dst_src1 = dst_src1 - src2 * src3 ++instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 - src2 * src3 ++instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 - src2 * src3 ++instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 - src2 * src3 ++instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{ ++ match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); ++ ins_cost(VEC_COST); ++ format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), ++ as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector mul ++ ++instruct vmulB(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (MulVB src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vmulS(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (MulVS src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vmulI(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (MulVI src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vmulL(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (MulVL src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vmulF(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (MulVF src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vmulD(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (MulVD src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector fneg ++ ++instruct vnegF(vReg dst, vReg src) %{ ++ match(Set dst (NegVF src)); ++ ins_cost(VEC_COST); ++ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vnegD(vReg dst, vReg src) %{ ++ match(Set dst (NegVD src)); ++ ins_cost(VEC_COST); ++ format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// popcount vector ++ ++instruct vpopcountI(iRegINoSp dst, vReg src) %{ ++ match(Set dst (PopCountVI src)); ++ format %{ "vpopc.m $dst, $src\t#@vpopcountI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector add reduction ++ ++instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (AddReductionVI src1 src2)); ++ effect(TEMP tmp); ++ ins_cost(VEC_COST); ++ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t" ++ "vredsum.vs $tmp, $src2, $tmp\n\t" ++ "vmv.x.s $dst, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); ++ match(Set dst (AddReductionVI src1 src2)); ++ effect(TEMP tmp); ++ ins_cost(VEC_COST); ++ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t" ++ "vredsum.vs $tmp, $src2, $tmp\n\t" ++ "vmv.x.s $dst, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (AddReductionVI src1 src2)); ++ effect(TEMP tmp); ++ ins_cost(VEC_COST); ++ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t" ++ "vredsum.vs $tmp, $src2, $tmp\n\t" ++ "vmv.x.s $dst, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{ ++ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (AddReductionVL src1 src2)); ++ effect(TEMP tmp); ++ ins_cost(VEC_COST); ++ format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t" ++ "vredsum.vs $tmp, $src2, $tmp\n\t" ++ "vmv.x.s $dst, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register); ++ __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp$$reg)); ++ __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{ ++ match(Set src1_dst (AddReductionVF src1_dst src2)); ++ effect(TEMP tmp); ++ ins_cost(VEC_COST); ++ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t" ++ "vfredosum.vs $tmp, $src2, $tmp\n\t" ++ "vfmv.f.s $src1_dst, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); ++ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp$$reg)); ++ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{ ++ match(Set src1_dst (AddReductionVD src1_dst src2)); ++ effect(TEMP tmp); ++ ins_cost(VEC_COST); ++ format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t" ++ "vfredosum.vs $tmp, $src2, $tmp\n\t" ++ "vfmv.f.s $src1_dst, $tmp" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister); ++ __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), ++ as_VectorRegister($tmp$$reg)); ++ __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector replicate ++ ++instruct replicateB(vReg dst, iRegIorL2I src) %{ ++ match(Set dst (ReplicateB src)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.x $dst, $src\t#@replicateB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateS(vReg dst, iRegIorL2I src) %{ ++ match(Set dst (ReplicateS src)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.x $dst, $src\t#@replicateS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateI(vReg dst, iRegIorL2I src) %{ ++ match(Set dst (ReplicateI src)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.x $dst, $src\t#@replicateI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateL(vReg dst, iRegL src) %{ ++ match(Set dst (ReplicateL src)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.x $dst, $src\t#@replicateL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateB_imm5(vReg dst, immI5 con) %{ ++ match(Set dst (ReplicateB con)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.i $dst, $con\t#@replicateB_imm5" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateS_imm5(vReg dst, immI5 con) %{ ++ match(Set dst (ReplicateS con)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.i $dst, $con\t#@replicateS_imm5" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateI_imm5(vReg dst, immI5 con) %{ ++ match(Set dst (ReplicateI con)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.i $dst, $con\t#@replicateI_imm5" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateL_imm5(vReg dst, immL5 con) %{ ++ match(Set dst (ReplicateL con)); ++ ins_cost(VEC_COST); ++ format %{ "vmv.v.i $dst, $con\t#@replicateL_imm5" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateF(vReg dst, fRegF src) %{ ++ match(Set dst (ReplicateF src)); ++ ins_cost(VEC_COST); ++ format %{ "vfmv.v.f $dst, $src\t#@replicateF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateD(vReg dst, fRegD src) %{ ++ match(Set dst (ReplicateD src)); ++ ins_cost(VEC_COST); ++ format %{ "vfmv.v.f $dst, $src\t#@replicateD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector shift ++ ++instruct vasrB(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (RShiftVB src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst); ++ format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t" ++ "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t" ++ "vmnot.m v0, v0\n\t" ++ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits ++ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ BitsPerByte - 1, Assembler::v0_t); ++ // otherwise, shift ++ __ vmnot_m(v0, v0); ++ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrS(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (RShiftVS src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst); ++ format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t" ++ "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t" ++ "vmnot.m v0, v0\n\t" ++ "vsra.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits ++ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ BitsPerShort - 1, Assembler::v0_t); ++ // otherwise, shift ++ __ vmnot_m(v0, v0); ++ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrI(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (RShiftVI src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrL(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (RShiftVL src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslB(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (LShiftVB src shift)); ++ ins_cost(VEC_COST); ++ effect( TEMP_DEF dst); ++ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t" ++ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" ++ "vmnot.m v0, v0\n\t" ++ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ // if shift > BitsPerByte - 1, clear the element ++ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg), Assembler::v0_t); ++ // otherwise, shift ++ __ vmnot_m(v0, v0); ++ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslS(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (LShiftVS src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst); ++ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t" ++ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" ++ "vmnot.m v0, v0\n\t" ++ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ // if shift > BitsPerShort - 1, clear the element ++ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg), Assembler::v0_t); ++ // otherwise, shift ++ __ vmnot_m(v0, v0); ++ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslI(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (LShiftVI src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslL(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (LShiftVL src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrB(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (URShiftVB src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst); ++ format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t" ++ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" ++ "vmnot.m v0, v0, v0\n\t" ++ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ // if shift > BitsPerByte - 1, clear the element ++ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1); ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg), Assembler::v0_t); ++ // otherwise, shift ++ __ vmnot_m(v0, v0); ++ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrS(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (URShiftVS src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP_DEF dst); ++ format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t" ++ "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t" ++ "vmnot.m v0, v0\n\t" ++ "vsll.vv $dst, $src, $shift, Assembler::v0_t" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ // if shift > BitsPerShort - 1, clear the element ++ __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1); ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg), Assembler::v0_t); ++ // otherwise, shift ++ __ vmnot_m(v0, v0); ++ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg), Assembler::v0_t); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++ ++instruct vlsrI(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (URShiftVI src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++ ++instruct vlsrL(vReg dst, vReg src, vReg shift) %{ ++ match(Set dst (URShiftVL src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (RShiftVB src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e8); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ if (con >= BitsPerByte) con = BitsPerByte - 1; ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (RShiftVS src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e16); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ if (con >= BitsPerShort) con = BitsPerShort - 1; ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (RShiftVI src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e32); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{ ++ predicate((n->in(2)->get_int() & 0x3f) < 64); ++ match(Set dst (RShiftVL src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x3f; ++ __ vsetvli(t0, x0, Assembler::e64); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ if (con < 32) { ++ __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ } else { ++ __ li(t0, con); ++ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0); ++ __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg)); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (URShiftVB src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e8); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ if (con >= BitsPerByte) { ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (URShiftVS src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e16); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ if (con >= BitsPerShort) { ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (URShiftVI src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e32); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{ ++ predicate((n->in(2)->get_int() & 0x3f) < 64); ++ match(Set dst (URShiftVL src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x3f; ++ __ vsetvli(t0, x0, Assembler::e64); ++ if (con == 0) { ++ __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ if (con < 32) { ++ __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ } else { ++ __ li(t0, con); ++ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0); ++ __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg)); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (LShiftVB src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e8); ++ if (con >= BitsPerByte) { ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (LShiftVS src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e16); ++ if (con >= BitsPerShort) { ++ __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), ++ as_VectorRegister($src$$reg)); ++ return; ++ } ++ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ ++ match(Set dst (LShiftVI src shift)); ++ ins_cost(VEC_COST); ++ format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x1f; ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslL_imm(vReg dst, vReg src, immI shift, vReg tmp) %{ ++ predicate((n->in(2)->get_int() & 0x3f) < 64); ++ match(Set dst (LShiftVL src shift)); ++ ins_cost(VEC_COST); ++ effect(TEMP tmp); ++ format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %} ++ ins_encode %{ ++ uint32_t con = (unsigned)$shift$$constant & 0x3f; ++ __ vsetvli(t0, x0, Assembler::e64); ++ if (con < 32) { ++ __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con); ++ } else { ++ __ li(t0, con); ++ __ vmv_v_x(as_VectorRegister($tmp$$reg), t0); ++ __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($tmp$$reg)); ++ } ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || ++ n->bottom_type()->is_vect()->element_basic_type() == T_CHAR); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ ++ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector sqrt ++ ++instruct vsqrtF(vReg dst, vReg src) %{ ++ match(Set dst (SqrtVF src)); ++ ins_cost(VEC_COST); ++ format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vsqrtD(vReg dst, vReg src) %{ ++ match(Set dst (SqrtVD src)); ++ ins_cost(VEC_COST); ++ format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector sub ++ ++instruct vsubB(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (SubVB src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e8); ++ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vsubS(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (SubVS src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e16); ++ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vsubI(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (SubVI src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vsubL(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (SubVL src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vsubF(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (SubVF src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e32); ++ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vsubD(vReg dst, vReg src1, vReg src2) %{ ++ match(Set dst (SubVD src1 src2)); ++ ins_cost(VEC_COST); ++ format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %} ++ ins_encode %{ ++ __ vsetvli(t0, x0, Assembler::e64); ++ __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg), ++ as_VectorRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, ++ iRegI_R10 result, vReg_V1 v1, ++ vReg_V2 v2, vReg_V3 v3, rFlagsReg r6) ++%{ ++ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3); ++ ++ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %} ++ ins_encode %{ ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_equals_v($str1$$Register, $str2$$Register, ++ $result$$Register, $cnt$$Register, 1); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt, ++ iRegI_R10 result, vReg_V1 v1, ++ vReg_V2 v2, vReg_V3 v3, rFlagsReg r6) ++%{ ++ predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3); ++ ++ format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %} ++ ins_encode %{ ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_equals_v($str1$$Register, $str2$$Register, ++ $result$$Register, $cnt$$Register, 2); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6) ++%{ ++ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (AryEq ary1 ary2)); ++ effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6); ++ ++ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %} ++ ins_encode %{ ++ __ arrays_equals_v($ary1$$Register, $ary2$$Register, ++ $result$$Register, $tmp$$Register, 1); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, rFlagsReg r6) ++%{ ++ predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (AryEq ary1 ary2)); ++ effect(DEF result, KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6); ++ ++ format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %} ++ ins_encode %{ ++ __ arrays_equals_v($ary1$$Register, $ary2$$Register, ++ $result$$Register, $tmp$$Register, 2); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, ++ iRegP_R28 tmp1, iRegL_R29 tmp2) ++%{ ++ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, ++ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); ++ ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %} ++ ins_encode %{ ++ // Count is in 8-bit bytes; non-Compact chars are 16 bits. ++ __ string_compare_v($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ StrIntrinsicNode::UU); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, ++ iRegP_R28 tmp1, iRegL_R29 tmp2) ++%{ ++ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, ++ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); ++ ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %} ++ ins_encode %{ ++ __ string_compare_v($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ StrIntrinsicNode::LL); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, ++ iRegP_R28 tmp1, iRegL_R29 tmp2) ++%{ ++ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, ++ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); ++ ++ format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %} ++ ins_encode %{ ++ __ string_compare_v($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ StrIntrinsicNode::UL); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2, ++ iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, ++ iRegP_R28 tmp1, iRegL_R29 tmp2) ++%{ ++ predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU); ++ match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2))); ++ effect(DEF result, KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, ++ TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5); ++ ++ format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %} ++ ins_encode %{ ++ __ string_compare_v($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ $tmp1$$Register, $tmp2$$Register, ++ StrIntrinsicNode::LU); ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++// fast byte[] to char[] inflation ++instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) ++%{ ++ predicate(UseRVV); ++ match(Set dummy (StrInflatedCopy src (Binary dst len))); ++ effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len); ++ ++ format %{ "String Inflate $src,$dst" %} ++ ins_encode %{ ++ address tpc = __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); ++ if (tpc == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ins_pipe(pipe_class_memory); ++%} ++ ++// encode char[] to byte[] in ISO_8859_1 ++instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) ++%{ ++ predicate(UseRVV); ++ match(Set result (EncodeISOArray src (Binary dst len))); ++ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, ++ TEMP v1, TEMP v2, TEMP v3, TEMP tmp); ++ ++ format %{ "Encode array $src,$dst,$len -> $result" %} ++ ins_encode %{ ++ __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register, ++ $result$$Register, $tmp$$Register); ++ %} ++ ins_pipe( pipe_class_memory ); ++%} ++ ++// fast char[] to byte[] compression ++instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result, ++ vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegL tmp) ++%{ ++ predicate(UseRVV); ++ match(Set result (StrCompressedCopy src (Binary dst len))); ++ effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len, ++ TEMP v1, TEMP v2, TEMP v3, TEMP tmp); ++ ++ format %{ "String Compress $src,$dst -> $result // KILL R11, R12, R13" %} ++ ins_encode %{ ++ __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register, ++ $result$$Register, $tmp$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct vhas_negatives(iRegP_R11 ary1, iRegI_R12 len, iRegI_R10 result, iRegL tmp) ++%{ ++ predicate(UseRVV); ++ match(Set result (HasNegatives ary1 len)); ++ effect(USE_KILL ary1, USE_KILL len, TEMP tmp); ++ format %{ "has negatives byte[] $ary1,$len -> $result" %} ++ ins_encode %{ ++ address tpc = __ has_negatives_v($ary1$$Register, $len$$Register, $result$$Register, $tmp$$Register); ++ if (tpc == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// clearing of an array ++instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy, ++ vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3) ++%{ ++ predicate(UseRVV); ++ match(Set dummy (ClearArray cnt base)); ++ effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3); ++ ++ format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %} ++ ++ ins_encode %{ ++ __ clear_array_v($base$$Register, $cnt$$Register); ++ %} ++ ++ ins_pipe(pipe_class_memory); ++%} +diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +new file mode 100644 +index 000000000..9922ff4cf +--- /dev/null ++++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +@@ -0,0 +1,2738 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/debugInfoRec.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "logging/log.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/align.hpp" ++#include "vmreg_riscv.inline.hpp" ++#ifdef COMPILER1 ++#include "c1/c1_Runtime1.hpp" ++#endif ++#ifdef COMPILER2 ++#include "adfiles/ad_riscv.hpp" ++#include "opto/runtime.hpp" ++#endif ++ ++#define __ masm-> ++ ++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; ++ ++class SimpleRuntimeFrame { ++public: ++ ++ // Most of the runtime stubs have this simple frame layout. ++ // This class exists to make the layout shared in one place. ++ // Offsets are for compiler stack slots, which are jints. ++ enum layout { ++ // The frame sender code expects that fp will be in the "natural" place and ++ // will override any oopMap setting for it. We must therefore force the layout ++ // so that it agrees with the frame sender code. ++ // we don't expect any arg reg save area so riscv asserts that ++ // frame::arg_reg_save_area_bytes == 0 ++ fp_off = 0, fp_off2, ++ return_off, return_off2, ++ framesize ++ }; ++}; ++ ++class RegisterSaver { ++ const bool _save_vectors; ++ public: ++ RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {} ++ ~RegisterSaver() {} ++ OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); ++ void restore_live_registers(MacroAssembler* masm); ++ ++ // Offsets into the register save area ++ // Used by deoptimization when it is managing result register ++ // values on its own ++ // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4) ++ // |---v0---|<---SP ++ // |---v1---|save vectors only in generate_handler_blob ++ // |-- .. --| ++ // |---v31--|----- ++ // |---f0---| ++ // |---f1---| ++ // | .. | ++ // |---f31--| ++ // |---reserved slot for stack alignment---| ++ // |---x5---| ++ // | x6 | ++ // |---.. --| ++ // |---x31--| ++ // |---fp---| ++ // |---ra---| ++ int v0_offset_in_bytes(void) { return 0; } ++ int f0_offset_in_bytes(void) { ++ int f0_offset = 0; ++#ifdef COMPILER2 ++ if (_save_vectors) { ++ f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers * ++ BytesPerInt; ++ } ++#endif ++ return f0_offset; ++ } ++ int reserved_slot_offset_in_bytes(void) { ++ return f0_offset_in_bytes() + ++ FloatRegisterImpl::max_slots_per_register * ++ FloatRegisterImpl::number_of_registers * ++ BytesPerInt; ++ } ++ ++ int reg_offset_in_bytes(Register r) { ++ assert(r->encoding() > 4, "ra, sp, gp and tp not saved"); ++ return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize; ++ } ++ ++ int freg_offset_in_bytes(FloatRegister f) { ++ return f0_offset_in_bytes() + f->encoding() * wordSize; ++ } ++ ++ int ra_offset_in_bytes(void) { ++ return reserved_slot_offset_in_bytes() + ++ (RegisterImpl::number_of_registers - 3) * ++ RegisterImpl::max_slots_per_register * ++ BytesPerInt; ++ } ++ ++ // During deoptimization only the result registers need to be restored, ++ // all the other values have already been extracted. ++ void restore_result_registers(MacroAssembler* masm); ++}; ++ ++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { ++ int vector_size_in_bytes = 0; ++ int vector_size_in_slots = 0; ++#ifdef COMPILER2 ++ if (_save_vectors) { ++ vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE); ++ vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT); ++ } ++#endif ++ ++ int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16); ++ // OopMap frame size is in compiler stack slots (jint's) not bytes or words ++ int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; ++ // The caller will allocate additional_frame_words ++ int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt; ++ // CodeBlob frame size is in words. ++ int frame_size_in_words = frame_size_in_bytes / wordSize; ++ *total_frame_words = frame_size_in_words; ++ ++ // Save Integer, Float and Vector registers. ++ __ enter(); ++ __ push_CPU_state(_save_vectors, vector_size_in_bytes); ++ ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* oop_map = new OopMap(frame_size_in_slots, 0); ++ assert_cond(oop_maps != NULL && oop_map != NULL); ++ ++ int sp_offset_in_slots = 0; ++ int step_in_slots = 0; ++ if (_save_vectors) { ++ step_in_slots = vector_size_in_slots; ++ for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { ++ VectorRegister r = as_VectorRegister(i); ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); ++ } ++ } ++ ++ step_in_slots = FloatRegisterImpl::max_slots_per_register; ++ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { ++ FloatRegister r = as_FloatRegister(i); ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg()); ++ } ++ ++ step_in_slots = RegisterImpl::max_slots_per_register; ++ // skip the slot reserved for alignment, see MacroAssembler::push_reg; ++ // also skip x5 ~ x6 on the stack because they are caller-saved registers. ++ sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3; ++ // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack. ++ for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) { ++ Register r = as_Register(i); ++ if (r != xthread) { ++ oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg()); ++ } ++ } ++ ++ return oop_map; ++} ++ ++void RegisterSaver::restore_live_registers(MacroAssembler* masm) { ++#ifdef COMPILER2 ++ __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE)); ++#else ++ __ pop_CPU_state(_save_vectors); ++#endif ++ __ leave(); ++} ++ ++void RegisterSaver::restore_result_registers(MacroAssembler* masm) { ++ // Just restore result register. Only used by deoptimization. By ++ // now any callee save register that needs to be restored to a c2 ++ // caller of the deoptee has been extracted into the vframeArray ++ // and will be stuffed into the c2i adapter we create for later ++ // restoration so only result registers need to be restored here. ++ // Restore fp result register ++ __ fld(f10, Address(sp, freg_offset_in_bytes(f10))); ++ // Restore integer result register ++ __ ld(x10, Address(sp, reg_offset_in_bytes(x10))); ++ ++ // Pop all of the register save are off the stack ++ __ add(sp, sp, align_up(ra_offset_in_bytes(), 16)); ++} ++ ++// Is vector's size (in bytes) bigger than a size saved by default? ++// riscv does not ovlerlay the floating-point registers on vector registers like aarch64. ++bool SharedRuntime::is_wide_vector(int size) { ++ return UseRVV; ++} ++ ++size_t SharedRuntime::trampoline_size() { ++ // Byte size of function generate_trampoline. movptr_with_offset: 5 instructions, jalr: 1 instrction ++ return 6 * NativeInstruction::instruction_size; // lui + addi + slli + addi + slli + jalr ++} ++ ++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, destination, offset); // lui + addi + slli + addi + slli ++ __ jalr(x0, t0, offset); ++} ++ ++// --------------------------------------------------------------------------- ++// Read the array of BasicTypes from a signature, and compute where the ++// arguments should go. Values in the VMRegPair regs array refer to 4-byte ++// quantities. Values less than VMRegImpl::stack0 are registers, those above ++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer ++// as framesizes are fixed. ++// VMRegImpl::stack0 refers to the first slot 0(sp). ++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register ++// up to RegisterImpl::number_of_registers) are the 64-bit ++// integer registers. ++ ++// Note: the INPUTS in sig_bt are in units of Java argument words, ++// which are 64-bit. The OUTPUTS are in 32-bit units. ++ ++// The Java calling convention is a "shifted" version of the C ABI. ++// By skipping the first C ABI register we can call non-static jni ++// methods with small numbers of arguments without having to shuffle ++// the arguments at all. Since we control the java ABI we ought to at ++// least get some advantage out of it. ++ ++int SharedRuntime::java_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ int total_args_passed, ++ int is_outgoing) { ++ assert_cond(sig_bt != NULL && regs != NULL); ++ // Create the mapping between argument positions and ++ // registers. ++ static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { ++ j_rarg0, j_rarg1, j_rarg2, j_rarg3, ++ j_rarg4, j_rarg5, j_rarg6, j_rarg7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = { ++ j_farg0, j_farg1, j_farg2, j_farg3, ++ j_farg4, j_farg5, j_farg6, j_farg7 ++ }; ++ ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_BOOLEAN: // fall through ++ case T_CHAR: // fall through ++ case T_BYTE: // fall through ++ case T_SHORT: // fall through ++ case T_INT: ++ if (int_args < Argument::n_int_register_parameters_j) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_VOID: ++ // halves of T_LONG or T_DOUBLE ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_LONG: // fall through ++ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); ++ case T_OBJECT: // fall through ++ case T_ARRAY: // fall through ++ case T_ADDRESS: ++ if (int_args < Argument::n_int_register_parameters_j) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters_j) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters_j) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ ++ return align_up(stk_args, 2); ++} ++ ++// Patch the callers callsite with entry to compiled code if it exists. ++static void patch_callers_callsite(MacroAssembler *masm) { ++ Label L; ++ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); ++ __ beqz(t0, L); ++ ++ __ enter(); ++ __ push_CPU_state(); ++ ++ // VM needs caller's callsite ++ // VM needs target method ++ // This needs to be a long call since we will relocate this adapter to ++ // the codeBuffer and it may not reach ++ ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ ++ __ mv(c_rarg0, xmethod); ++ __ mv(c_rarg1, ra); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset); ++ __ jalr(x1, t0, offset); ++ __ pop_CPU_state(); ++ // restore sp ++ __ leave(); ++ __ bind(L); ++} ++ ++static void gen_c2i_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ Label& skip_fixup) { ++ // Before we get into the guts of the C2I adapter, see if we should be here ++ // at all. We've come from compiled code and are attempting to jump to the ++ // interpreter, which means the caller made a static call to get here ++ // (vcalls always get a compiled target if there is one). Check for a ++ // compiled target. If there is one, we need to patch the caller's call. ++ patch_callers_callsite(masm); ++ ++ __ bind(skip_fixup); ++ ++ int words_pushed = 0; ++ ++ // Since all args are passed on the stack, total_args_passed * ++ // Interpreter::stackElementSize is the space we need. ++ ++ int extraspace = total_args_passed * Interpreter::stackElementSize; ++ ++ __ mv(x30, sp); ++ ++ // stack is aligned, keep it that way ++ extraspace = align_up(extraspace, 2 * wordSize); ++ ++ if (extraspace) { ++ __ sub(sp, sp, extraspace); ++ } ++ ++ // Now write the args into the outgoing interpreter space ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // offset to start parameters ++ int st_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; ++ int next_off = st_off - Interpreter::stackElementSize; ++ ++ // Say 4 args: ++ // i st_off ++ // 0 32 T_LONG ++ // 1 24 T_VOID ++ // 2 16 T_OBJECT ++ // 3 8 T_BOOL ++ // - 0 return address ++ // ++ // However to make thing extra confusing. Because we can fit a Java long/double in ++ // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter ++ // leaves one slot empty and only stores to a single slot. In this case the ++ // slot that is occupied is the T_VOID slot. See I said it was confusing. ++ ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // memory to memory use t0 ++ int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size ++ + extraspace ++ + words_pushed * wordSize); ++ if (!r_2->is_valid()) { ++ __ lwu(t0, Address(sp, ld_off)); ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++ } else { ++ __ ld(t0, Address(sp, ld_off), /*temp register*/esp); ++ ++ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG ++ // T_DOUBLE and T_LONG use two slots in the interpreter ++ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { ++ // ld_off == LSW, ld_off+wordSize == MSW ++ // st_off == MSW, next_off == LSW ++ __ sd(t0, Address(sp, next_off), /*temp register*/esp); ++#ifdef ASSERT ++ // Overwrite the unused slot with known junk ++ __ mv(t0, 0xdeadffffdeadaaaaul); ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++#endif /* ASSERT */ ++ } else { ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++ } ++ } ++ } else if (r_1->is_Register()) { ++ Register r = r_1->as_Register(); ++ if (!r_2->is_valid()) { ++ // must be only an int (or less ) so move only 32bits to slot ++ __ sd(r, Address(sp, st_off)); ++ } else { ++ // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG ++ // T_DOUBLE and T_LONG use two slots in the interpreter ++ if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { ++ // jlong/double in gpr ++#ifdef ASSERT ++ // Overwrite the unused slot with known junk ++ __ mv(t0, 0xdeadffffdeadaaabul); ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++#endif /* ASSERT */ ++ __ sd(r, Address(sp, next_off)); ++ } else { ++ __ sd(r, Address(sp, st_off)); ++ } ++ } ++ } else { ++ assert(r_1->is_FloatRegister(), ""); ++ if (!r_2->is_valid()) { ++ // only a float use just part of the slot ++ __ fsw(r_1->as_FloatRegister(), Address(sp, st_off)); ++ } else { ++#ifdef ASSERT ++ // Overwrite the unused slot with known junk ++ __ mv(t0, 0xdeadffffdeadaaacul); ++ __ sd(t0, Address(sp, st_off), /*temp register*/esp); ++#endif /* ASSERT */ ++ __ fsd(r_1->as_FloatRegister(), Address(sp, next_off)); ++ } ++ } ++ } ++ ++ __ mv(esp, sp); // Interp expects args on caller's expression stack ++ ++ __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset()))); ++ __ jr(t0); ++} ++ ++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs) { ++ // Cut-out for having no stack args. ++ int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord; ++ if (comp_args_on_stack != 0) { ++ __ sub(t0, sp, comp_words_on_stack * wordSize); ++ __ andi(sp, t0, -16); ++ } ++ ++ // Will jump to the compiled code just as if compiled code was doing it. ++ // Pre-load the register-jump target early, to schedule it better. ++ __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset()))); ++ ++ // Now generate the shuffle code. ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // Pick up 0, 1 or 2 words from SP+offset. ++ ++ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), ++ "scrambled load targets?"); ++ // Load in argument order going down. ++ int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize; ++ // Point to interpreter value (vs. tag) ++ int next_off = ld_off - Interpreter::stackElementSize; ++ ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset (+ wordSize to account for return address ) ++ int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size; ++ if (!r_2->is_valid()) { ++ __ lw(t0, Address(esp, ld_off)); ++ __ sd(t0, Address(sp, st_off), /*temp register*/t2); ++ } else { ++ // ++ // We are using two optoregs. This can be either T_OBJECT, ++ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates ++ // two slots but only uses one for thr T_LONG or T_DOUBLE case ++ // So we must adjust where to pick up the data to match the ++ // interpreter. ++ // ++ // Interpreter local[n] == MSW, local[n+1] == LSW however locals ++ // are accessed as negative so LSW is at LOW address ++ ++ // ld_off is MSW so get LSW ++ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? ++ next_off : ld_off; ++ __ ld(t0, Address(esp, offset)); ++ // st_off is LSW (i.e. reg.first()) ++ __ sd(t0, Address(sp, st_off), /*temp register*/t2); ++ } ++ } else if (r_1->is_Register()) { // Register argument ++ Register r = r_1->as_Register(); ++ if (r_2->is_valid()) { ++ // ++ // We are using two VMRegs. This can be either T_OBJECT, ++ // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates ++ // two slots but only uses one for thr T_LONG or T_DOUBLE case ++ // So we must adjust where to pick up the data to match the ++ // interpreter. ++ ++ const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? ++ next_off : ld_off; ++ ++ // this can be a misaligned move ++ __ ld(r, Address(esp, offset)); ++ } else { ++ // sign extend and use a full word? ++ __ lw(r, Address(esp, ld_off)); ++ } ++ } else { ++ if (!r_2->is_valid()) { ++ __ flw(r_1->as_FloatRegister(), Address(esp, ld_off)); ++ } else { ++ __ fld(r_1->as_FloatRegister(), Address(esp, next_off)); ++ } ++ } ++ } ++ ++ // 6243940 We might end up in handle_wrong_method if ++ // the callee is deoptimized as we race thru here. If that ++ // happens we don't want to take a safepoint because the ++ // caller frame will look interpreted and arguments are now ++ // "compiled" so it is much better to make this transition ++ // invisible to the stack walking code. Unfortunately if ++ // we try and find the callee by normal means a safepoint ++ // is possible. So we stash the desired callee in the thread ++ // and the vm will find there should this case occur. ++ ++ __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset())); ++ ++ __ jr(t1); ++} ++ ++// --------------------------------------------------------------- ++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ AdapterFingerPrint* fingerprint) { ++ address i2c_entry = __ pc(); ++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); ++ ++ address c2i_unverified_entry = __ pc(); ++ Label skip_fixup; ++ ++ Label ok; ++ ++ const Register holder = t1; ++ const Register receiver = j_rarg0; ++ const Register tmp = t2; // A call-clobbered register not used for arg passing ++ ++ // ------------------------------------------------------------------------- ++ // Generate a C2I adapter. On entry we know xmethod holds the Method* during calls ++ // to the interpreter. The args start out packed in the compiled layout. They ++ // need to be unpacked into the interpreter layout. This will almost always ++ // require some stack space. We grow the current (compiled) stack, then repack ++ // the args. We finally end in a jump to the generic interpreter entry point. ++ // On exit from the interpreter, the interpreter will restore our SP (lest the ++ // compiled code, which relys solely on SP and not FP, get sick). ++ ++ { ++ __ block_comment("c2i_unverified_entry {"); ++ __ load_klass(t0, receiver); ++ __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset())); ++ __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset())); ++ __ beq(t0, tmp, ok); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); ++ ++ __ bind(ok); ++ // Method might have been compiled since the call site was patched to ++ // interpreted; if that is the case treat it as a miss so we can get ++ // the call site corrected. ++ __ ld(t0, Address(xmethod, in_bytes(Method::code_offset()))); ++ __ beqz(t0, skip_fixup); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); ++ __ block_comment("} c2i_unverified_entry"); ++ } ++ ++ address c2i_entry = __ pc(); ++ ++ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); ++ ++ __ flush(); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); ++} ++ ++int SharedRuntime::c_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ VMRegPair *regs2, ++ int total_args_passed) { ++ assert(regs2 == NULL, "not needed on riscv"); ++ assert_cond(sig_bt != NULL && regs != NULL); ++ ++ // We return the amount of VMRegImpl stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. ++ ++ static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { ++ c_rarg0, c_rarg1, c_rarg2, c_rarg3, ++ c_rarg4, c_rarg5, c_rarg6, c_rarg7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = { ++ c_farg0, c_farg1, c_farg2, c_farg3, ++ c_farg4, c_farg5, c_farg6, c_farg7 ++ }; ++ ++ uint int_args = 0; ++ uint fp_args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_BOOLEAN: // fall through ++ case T_CHAR: // fall through ++ case T_BYTE: // fall through ++ case T_SHORT: // fall through ++ case T_INT: ++ if (int_args < Argument::n_int_register_parameters_c) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: // fall through ++ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); ++ case T_OBJECT: // fall through ++ case T_ARRAY: // fall through ++ case T_ADDRESS: // fall through ++ case T_METADATA: ++ if (int_args < Argument::n_int_register_parameters_c) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (fp_args < Argument::n_float_register_parameters_c) { ++ regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_int_register_parameters_c) { ++ regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (fp_args < Argument::n_float_register_parameters_c) { ++ regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); ++ } else if (int_args < Argument::n_int_register_parameters_c) { ++ regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_VOID: // Halves of longs and doubles ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ ++ return stk_args; ++} ++ ++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ fsw(f10, Address(fp, -3 * wordSize)); ++ break; ++ case T_DOUBLE: ++ __ fsd(f10, Address(fp, -3 * wordSize)); ++ break; ++ case T_VOID: break; ++ default: { ++ __ sd(x10, Address(fp, -3 * wordSize)); ++ } ++ } ++} ++ ++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ flw(f10, Address(fp, -3 * wordSize)); ++ break; ++ case T_DOUBLE: ++ __ fld(f10, Address(fp, -3 * wordSize)); ++ break; ++ case T_VOID: break; ++ default: { ++ __ ld(x10, Address(fp, -3 * wordSize)); ++ } ++ } ++} ++ ++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ RegSet x; ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ x = x + args[i].first()->as_Register(); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ addi(sp, sp, -2 * wordSize); ++ __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0)); ++ } ++ } ++ __ push_reg(x, sp); ++} ++ ++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ RegSet x; ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ x = x + args[i].first()->as_Register(); ++ } else { ++ ; ++ } ++ } ++ __ pop_reg(x, sp); ++ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { ++ if (args[i].first()->is_Register()) { ++ ; ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0)); ++ __ add(sp, sp, 2 * wordSize); ++ } ++ } ++} ++ ++// Check GCLocker::needs_gc and enter the runtime if it's true. This ++// keeps a new JNI critical region from starting until a GC has been ++// forced. Save down any oops in registers and describe them in an ++// OopMap. ++static void check_needs_gc_for_critical_native(MacroAssembler* masm, ++ int stack_slots, ++ int total_c_args, ++ int total_in_args, ++ int arg_save_area, ++ OopMapSet* oop_maps, ++ VMRegPair* in_regs, ++ BasicType* in_sig_bt) { Unimplemented(); } ++ ++// Unpack an array argument into a pointer to the body and the length ++// if the array is non-null, otherwise pass 0 for both. ++static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); } ++ ++class ComputeMoveOrder: public StackObj { ++ class MoveOperation: public ResourceObj { ++ friend class ComputeMoveOrder; ++ private: ++ VMRegPair _src; ++ VMRegPair _dst; ++ int _src_index; ++ int _dst_index; ++ bool _processed; ++ MoveOperation* _next; ++ MoveOperation* _prev; ++ ++ static int get_id(VMRegPair r) { Unimplemented(); return 0; } ++ ++ public: ++ MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): ++ _src(src) ++ , _dst(dst) ++ , _src_index(src_index) ++ , _dst_index(dst_index) ++ , _processed(false) ++ , _next(NULL) ++ , _prev(NULL) { Unimplemented(); } ++ ++ ~MoveOperation() { ++ _next = NULL; ++ _prev = NULL; ++ } ++ ++ VMRegPair src() const { Unimplemented(); return _src; } ++ int src_id() const { Unimplemented(); return 0; } ++ int src_index() const { Unimplemented(); return 0; } ++ VMRegPair dst() const { Unimplemented(); return _src; } ++ void set_dst(int i, VMRegPair dst) { Unimplemented(); } ++ int dst_index() const { Unimplemented(); return 0; } ++ int dst_id() const { Unimplemented(); return 0; } ++ MoveOperation* next() const { Unimplemented(); return 0; } ++ MoveOperation* prev() const { Unimplemented(); return 0; } ++ void set_processed() { Unimplemented(); } ++ bool is_processed() const { Unimplemented(); return 0; } ++ ++ // insert ++ void break_cycle(VMRegPair temp_register) { Unimplemented(); } ++ ++ void link(GrowableArray& killer) { Unimplemented(); } ++ }; ++ ++ private: ++ GrowableArray edges; ++ ++ public: ++ ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, ++ BasicType* in_sig_bt, GrowableArray& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); } ++ ++ ~ComputeMoveOrder() {} ++ // Collected all the move operations ++ void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); } ++ ++ // Walk the edges breaking cycles between moves. The result list ++ // can be walked in order to produce the proper set of loads ++ GrowableArray* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; } ++}; ++ ++static void verify_oop_args(MacroAssembler* masm, ++ const methodHandle& method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ const Register temp_reg = x9; // not part of any compiled calling seq ++ if (VerifyOops) { ++ for (int i = 0; i < method->size_of_parameters(); i++) { ++ if (sig_bt[i] == T_OBJECT || ++ sig_bt[i] == T_ARRAY) { ++ VMReg r = regs[i].first(); ++ assert(r->is_valid(), "bad oop arg"); ++ if (r->is_stack()) { ++ __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ __ verify_oop(temp_reg); ++ } else { ++ __ verify_oop(r->as_Register()); ++ } ++ } ++ } ++ } ++} ++ ++static void gen_special_dispatch(MacroAssembler* masm, ++ const methodHandle& method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ verify_oop_args(masm, method, sig_bt, regs); ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ ++ // Now write the args into the outgoing interpreter space ++ bool has_receiver = false; ++ Register receiver_reg = noreg; ++ int member_arg_pos = -1; ++ Register member_reg = noreg; ++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); ++ if (ref_kind != 0) { ++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument ++ member_reg = x9; // known to be free at this point ++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); ++ } else if (iid == vmIntrinsics::_invokeBasic) { ++ has_receiver = true; ++ } else { ++ fatal("unexpected intrinsic id %d", iid); ++ } ++ ++ if (member_reg != noreg) { ++ // Load the member_arg into register, if necessary. ++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); ++ VMReg r = regs[member_arg_pos].first(); ++ if (r->is_stack()) { ++ __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ member_reg = r->as_Register(); ++ } ++ } ++ ++ if (has_receiver) { ++ // Make sure the receiver is loaded into a register. ++ assert(method->size_of_parameters() > 0, "oob"); ++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); ++ VMReg r = regs[0].first(); ++ assert(r->is_valid(), "bad receiver arg"); ++ if (r->is_stack()) { ++ // Porting note: This assumes that compiled calling conventions always ++ // pass the receiver oop in a register. If this is not true on some ++ // platform, pick a temp and load the receiver from stack. ++ fatal("receiver always in a register"); ++ receiver_reg = x12; // known to be free at this point ++ __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ receiver_reg = r->as_Register(); ++ } ++ } ++ ++ // Figure out which address we are really jumping to: ++ MethodHandles::generate_method_handle_dispatch(masm, iid, ++ receiver_reg, member_reg, /*for_compiler_entry:*/ true); ++} ++ ++// --------------------------------------------------------------------------- ++// Generate a native wrapper for a given method. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// convention (handlizes oops, etc), transitions to native, makes the call, ++// returns to java state (possibly blocking), unhandlizes any result and ++// returns. ++// ++// Critical native functions are a shorthand for the use of ++// GetPrimtiveArrayCritical and disallow the use of any other JNI ++// functions. The wrapper is expected to unpack the arguments before ++// passing them to the callee and perform checks before and after the ++// native call to ensure that they GCLocker ++// lock_critical/unlock_critical semantics are followed. Some other ++// parts of JNI setup are skipped like the tear down of the JNI handle ++// block and the check for pending exceptions it's impossible for them ++// to be thrown. ++// ++// They are roughly structured like this: ++// if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical() ++// tranistion to thread_in_native ++// unpack arrray arguments and call native entry point ++// check for safepoint in progress ++// check if any thread suspend flags are set ++// call into JVM and possible unlock the JNI critical ++// if a GC was suppressed while in the critical native. ++// transition back to thread_in_Java ++// return to caller ++// ++nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ++ const methodHandle& method, ++ int compile_id, ++ BasicType* in_sig_bt, ++ VMRegPair* in_regs, ++ BasicType ret_type, ++ address critical_entry) { ++ if (method->is_method_handle_intrinsic()) { ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ intptr_t start = (intptr_t)__ pc(); ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // First instruction must be a nop as it may need to be patched on deoptimisation ++ __ nop(); ++ gen_special_dispatch(masm, ++ method, ++ in_sig_bt, ++ in_regs); ++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period ++ __ flush(); ++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually ++ return nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ in_ByteSize(-1), ++ in_ByteSize(-1), ++ (OopMapSet*)NULL); ++ } ++ bool is_critical_native = true; ++ address native_func = critical_entry; ++ if (native_func == NULL) { ++ native_func = method->native_function(); ++ is_critical_native = false; ++ } ++ assert(native_func != NULL, "must have function"); ++ ++ // An OopMap for lock (and class if static) ++ OopMapSet *oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ intptr_t start = (intptr_t)__ pc(); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the jni function will expect them. To figure out where they go ++ // we convert the java signature to a C signature by inserting ++ // the hidden arguments as arg[0] and possibly arg[1] (static method) ++ ++ const int total_in_args = method->size_of_parameters(); ++ int total_c_args = total_in_args; ++ if (!is_critical_native) { ++ total_c_args += 1; ++ if (method->is_static()) { ++ total_c_args++; ++ } ++ } else { ++ for (int i = 0; i < total_in_args; i++) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ total_c_args++; ++ } ++ } ++ } ++ ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); ++ assert_cond(out_sig_bt != NULL && out_regs != NULL); ++ BasicType* in_elem_bt = NULL; ++ ++ int argc = 0; ++ if (!is_critical_native) { ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } ++ ++ for (int i = 0; i < total_in_args ; i++) { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ } ++ } else { ++ Thread* THREAD = Thread::current(); ++ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); ++ assert_cond(in_elem_bt != NULL); ++ SignatureStream ss(method->signature()); ++ for (int i = 0; i < total_in_args ; i++) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ // Arrays are passed as int, elem* pair ++ out_sig_bt[argc++] = T_INT; ++ out_sig_bt[argc++] = T_ADDRESS; ++ Symbol* atype = ss.as_symbol(CHECK_NULL); ++ const char* at = atype->as_C_string(); ++ if (strlen(at) == 2) { ++ assert(at[0] == '[', "must be"); ++ switch (at[1]) { ++ case 'B': in_elem_bt[i] = T_BYTE; break; ++ case 'C': in_elem_bt[i] = T_CHAR; break; ++ case 'D': in_elem_bt[i] = T_DOUBLE; break; ++ case 'F': in_elem_bt[i] = T_FLOAT; break; ++ case 'I': in_elem_bt[i] = T_INT; break; ++ case 'J': in_elem_bt[i] = T_LONG; break; ++ case 'S': in_elem_bt[i] = T_SHORT; break; ++ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } else { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ in_elem_bt[i] = T_VOID; ++ } ++ if (in_sig_bt[i] != T_VOID) { ++ assert(in_sig_bt[i] == ss.type(), "must match"); ++ ss.next(); ++ } ++ } ++ } ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require. ++ int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Compute framesize for the wrapper. We need to handlize all oops in ++ // incoming registers ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Now the space for the inbound oop handle area ++ int total_save_slots = 8 * VMRegImpl::slots_per_word; // 8 arguments passed in registers ++ if (is_critical_native) { ++ // Critical natives may have to call out so they need a save area ++ // for register arguments. ++ int double_slots = 0; ++ int single_slots = 0; ++ for ( int i = 0; i < total_in_args; i++) { ++ if (in_regs[i].first()->is_Register()) { ++ const Register reg = in_regs[i].first()->as_Register(); ++ switch (in_sig_bt[i]) { ++ case T_BOOLEAN: ++ case T_BYTE: ++ case T_SHORT: ++ case T_CHAR: ++ case T_INT: single_slots++; break; ++ case T_ARRAY: // specific to LP64 (7145024) ++ case T_LONG: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ ShouldNotReachHere(); ++ } ++ } ++ total_save_slots = double_slots * 2 + single_slots; ++ // align the save area ++ if (double_slots != 0) { ++ stack_slots = align_up(stack_slots, 2); ++ } ++ } ++ ++ int oop_handle_offset = stack_slots; ++ stack_slots += total_save_slots; ++ ++ // Now any space we need for handlizing a klass if static method ++ ++ int klass_slot_offset = 0; ++ int klass_offset = -1; ++ int lock_slot_offset = 0; ++ bool is_static = false; ++ ++ if (method->is_static()) { ++ klass_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; ++ is_static = true; ++ } ++ ++ // Plus a lock if needed ++ ++ if (method->is_synchronized()) { ++ lock_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ } ++ ++ // Now a place (+2) to save return values or temp during shuffling ++ // + 4 for return address (which we own) and saved fp ++ stack_slots += 6; ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // | 2 slots (ra) | ++ // | 2 slots (fp) | ++ // |---------------------| ++ // | 2 slots for moves | ++ // |---------------------| ++ // | lock box (if sync) | ++ // |---------------------| <- lock_slot_offset ++ // | klass (if static) | ++ // |---------------------| <- klass_slot_offset ++ // | oopHandle area | ++ // |---------------------| <- oop_handle_offset (8 java arg registers) ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = align_up(stack_slots, StackAlignmentInSlots); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ // First thing make an ic check to see if we should even be here ++ ++ // We are free to use all registers as temps without saving them and ++ // restoring them except fp. fp is the only callee save register ++ // as far as the interpreter and the compiler(s) are concerned. ++ ++ ++ const Register ic_reg = t1; ++ const Register receiver = j_rarg0; ++ ++ Label hit; ++ Label exception_pending; ++ ++ assert_different_registers(ic_reg, receiver, t0); ++ __ verify_oop(receiver); ++ __ cmp_klass(receiver, ic_reg, t0, hit); ++ ++ __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); ++ ++ // Verified entry point must be aligned ++ __ align(8); ++ ++ __ bind(hit); ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // If we have to make this method not-entrant we'll overwrite its ++ // first instruction with a jump. ++ __ nop(); ++ ++ // Generate stack overflow check ++ if (UseStackBanging) { ++ __ bang_stack_with_offset(checked_cast(JavaThread::stack_shadow_zone_size())); ++ } else { ++ Unimplemented(); ++ } ++ ++ // Generate a new frame for the wrapper. ++ __ enter(); ++ // -2 because return address is already present and so is saved fp ++ __ sub(sp, sp, stack_size - 2 * wordSize); ++ ++ // Frame is now completed as far as size and linkage. ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++ // We use x18 as the oop handle for the receiver/klass ++ // It is callee save so it survives the call to native ++ ++ const Register oop_handle_reg = x18; ++ ++ if (is_critical_native) { ++ check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, ++ oop_handle_offset, oop_maps, in_regs, in_sig_bt); ++ } ++ ++ // ++ // We immediately shuffle the arguments so that any vm call we have to ++ // make from here on out (sync slow path, jvmti, etc.) we will have ++ // captured the oops from our caller and have a valid oopMap for ++ // them. ++ ++ // ----------------- ++ // The Grand Shuffle ++ ++ // The Java calling convention is either equal (linux) or denser (win64) than the ++ // c calling convention. However the because of the jni_env argument the c calling ++ // convention always has at least one more (and two for static) arguments than Java. ++ // Therefore if we move the args from java -> c backwards then we will never have ++ // a register->register conflict and we don't have to build a dependency graph ++ // and figure out how to break any cycles. ++ // ++ ++ // Record esp-based slot for receiver on stack for non-static methods ++ int receiver_offset = -1; ++ ++ // This is a trick. We double the stack slots so we can claim ++ // the oops in the caller's frame. Since we are sure to have ++ // more args than the caller doubling is enough to make ++ // sure we can capture all the incoming oop args from the ++ // caller. ++ // ++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); ++ assert_cond(map != NULL); ++ ++ int float_args = 0; ++ int int_args = 0; ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ // This may iterate in two different directions depending on the ++ // kind of native it is. The reason is that for regular JNI natives ++ // the incoming and outgoing registers are offset upwards and for ++ // critical natives they are offset down. ++ GrowableArray arg_order(2 * total_in_args); ++ VMRegPair tmp_vmreg; ++ tmp_vmreg.set2(x9->as_VMReg()); ++ ++ if (!is_critical_native) { ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); ++ } ++ } else { ++ // Compute a valid move order, using tmp_vmreg to break any cycles ++ ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); ++ } ++ ++ int temploc = -1; ++ for (int ai = 0; ai < arg_order.length(); ai += 2) { ++ int i = arg_order.at(ai); ++ int c_arg = arg_order.at(ai + 1); ++ __ block_comment(err_msg("mv %d -> %d", i, c_arg)); ++ if (c_arg == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // This arg needs to be moved to a temporary ++ __ mv(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); ++ in_regs[i] = tmp_vmreg; ++ temploc = i; ++ continue; ++ } else if (i == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // Read from the temporary location ++ assert(temploc != -1, "must be valid"); ++ i = temploc; ++ temploc = -1; ++ } ++#ifdef ASSERT ++ if (in_regs[i].first()->is_Register()) { ++ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); ++ } ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif /* ASSERT */ ++ switch (in_sig_bt[i]) { ++ case T_ARRAY: ++ if (is_critical_native) { ++ unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); ++ c_arg++; ++#ifdef ASSERT ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif ++ int_args++; ++ break; ++ } ++ // no break ++ case T_OBJECT: ++ assert(!is_critical_native, "no oop arguments"); ++ __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); ++ int_args++; ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ __ float_move(in_regs[i], out_regs[c_arg]); ++ float_args++; ++ break; ++ ++ case T_DOUBLE: ++ assert( i + 1 < total_in_args && ++ in_sig_bt[i + 1] == T_VOID && ++ out_sig_bt[c_arg + 1] == T_VOID, "bad arg list"); ++ __ double_move(in_regs[i], out_regs[c_arg]); ++ float_args++; ++ break; ++ ++ case T_LONG : ++ __ long_move(in_regs[i], out_regs[c_arg]); ++ int_args++; ++ break; ++ ++ case T_ADDRESS: ++ assert(false, "found T_ADDRESS in java args"); ++ break; ++ ++ default: ++ __ move32_64(in_regs[i], out_regs[c_arg]); ++ int_args++; ++ } ++ } ++ ++ // point c_arg at the first arg that is already loaded in case we ++ // need to spill before we call out ++ int c_arg = total_c_args - total_in_args; ++ ++ // Pre-load a static method's oop into c_rarg1. ++ if (method->is_static() && !is_critical_native) { ++ ++ // load oop into a register ++ __ movoop(c_rarg1, ++ JNIHandles::make_local(method->method_holder()->java_mirror()), ++ /*immediate*/true); ++ ++ // Now handlize the static class mirror it's known not-null. ++ __ sd(c_rarg1, Address(sp, klass_offset)); ++ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ ++ // Now get the handle ++ __ la(c_rarg1, Address(sp, klass_offset)); ++ // and protect the arg if we must spill ++ c_arg--; ++ } ++ ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a stack traversal). ++ // We use the same pc/oopMap repeatedly when we call out ++ ++ Label native_return; ++ __ set_last_Java_frame(sp, noreg, native_return, t0); ++ ++ Label dtrace_method_entry, dtrace_method_entry_done; ++ { ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset); ++ __ lbu(t0, Address(t0, offset)); ++ __ addw(t0, t0, zr); ++ __ bnez(t0, dtrace_method_entry); ++ __ bind(dtrace_method_entry_done); ++ } ++ ++ // RedefineClasses() tracing support for obsolete method entry ++ if (log_is_enabled(Trace, redefine, class, obsolete)) { ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ __ mov_metadata(c_rarg1, method()); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), ++ xthread, c_rarg1); ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ } ++ ++ // Lock a synchronized method ++ ++ // Register definitions used by locking and unlocking ++ ++ const Register swap_reg = x10; ++ const Register obj_reg = x9; // Will contain the oop ++ const Register lock_reg = x30; // Address of compiler lock object (BasicLock) ++ const Register old_hdr = x30; // value of old header at unlock time ++ const Register tmp = ra; ++ ++ Label slow_path_lock; ++ Label lock_done; ++ ++ if (method->is_synchronized()) { ++ assert(!is_critical_native, "unhandled"); ++ ++ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Get the handle (the 2nd argument) ++ __ mv(oop_handle_reg, c_rarg1); ++ ++ // Get address of the box ++ ++ __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ ++ // Load the oop from the handle ++ __ ld(obj_reg, Address(oop_handle_reg, 0)); ++ ++ if (UseBiasedLocking) { ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock); ++ } ++ ++ // Load (object->mark() | 1) into swap_reg % x10 ++ __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ __ ori(swap_reg, t0, 1); ++ ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); ++ ++ // src -> dest if dest == x10 else x10 <- dest ++ { ++ Label here; ++ __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL); ++ } ++ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg ++ ++ __ sub(swap_reg, swap_reg, sp); ++ __ andi(swap_reg, swap_reg, 3 - os::vm_page_size()); ++ ++ // Save the test result, for recursive case, the result is zero ++ __ sd(swap_reg, Address(lock_reg, mark_word_offset)); ++ __ bnez(swap_reg, slow_path_lock); ++ ++ // Slow path will re-enter here ++ ++ __ bind(lock_done); ++ } ++ ++ ++ // Finally just about ready to make the JNI call ++ ++ // get JNIEnv* which is first argument to native ++ if (!is_critical_native) { ++ __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset()))); ++ } ++ ++ // Now set thread in native ++ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); ++ __ mv(t0, _thread_in_native); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sw(t0, Address(t1)); ++ ++ __ rt_call(native_func); ++ ++ __ bind(native_return); ++ ++ intptr_t return_pc = (intptr_t) __ pc(); ++ oop_maps->add_gc_map(return_pc - start, map); ++ ++ // Unpack native results. ++ if(ret_type != T_OBJECT && ret_type != T_ARRAY) { ++ __ cast_primitive_type(ret_type, x10); ++ } ++ ++ // Switch thread to "native transition" state before reading the synchronization state. ++ // This additional state is necessary because reading and testing the synchronization ++ // state is not atomic w.r.t. GC, as this scenario demonstrates: ++ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. ++ // VM thread changes sync state to synchronizing and suspends threads for GC. ++ // Thread A is resumed to finish this native method, but doesn't block here since it ++ // didn't see any synchronization is progress, and escapes. ++ __ mv(t0, _thread_in_native_trans); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); ++ ++ // Force this write out before the read below ++ __ membar(MacroAssembler::AnyAny); ++ } else { ++ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sw(t0, Address(t1)); ++ ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(xthread, x12, t0); ++ } ++ } else { ++ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); ++ } ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ Label safepoint_in_progress, safepoint_in_progress_done; ++ { ++ __ safepoint_poll_acquire(safepoint_in_progress); ++ __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset())); ++ __ bnez(t0, safepoint_in_progress); ++ __ bind(safepoint_in_progress_done); ++ } ++ ++ // change thread state ++ Label after_transition; ++ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); ++ __ mv(t0, _thread_in_Java); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sw(t0, Address(t1)); ++ __ bind(after_transition); ++ ++ Label reguard; ++ Label reguard_done; ++ __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset())); ++ __ mv(t1, JavaThread::stack_guard_yellow_reserved_disabled); ++ __ beq(t0, t1, reguard); ++ __ bind(reguard_done); ++ ++ // native result if any is live ++ ++ // Unlock ++ Label unlock_done; ++ Label slow_path_unlock; ++ if (method->is_synchronized()) { ++ ++ // Get locked oop from the handle we passed to jni ++ __ ld(obj_reg, Address(oop_handle_reg, 0)); ++ ++ Label done; ++ ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, old_hdr, done); ++ } ++ ++ // Simple recursive lock? ++ ++ __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ __ beqz(t0, done); ++ ++ // Must save x10 if if it is live now because cmpxchg must use it ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ ++ ++ // get address of the stack lock ++ __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ // get old displaced header ++ __ ld(old_hdr, Address(x10, 0)); ++ ++ // Atomic swap old header if oop still contains the stack lock ++ Label succeed; ++ __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock); ++ __ bind(succeed); ++ ++ // slow path re-enters here ++ __ bind(unlock_done); ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ __ bind(done); ++ } ++ ++ Label dtrace_method_exit, dtrace_method_exit_done; ++ { ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset); ++ __ lbu(t0, Address(t0, offset)); ++ __ bnez(t0, dtrace_method_exit); ++ __ bind(dtrace_method_exit_done); ++ } ++ ++ __ reset_last_Java_frame(false); ++ ++ // Unbox oop result, e.g. JNIHandles::resolve result. ++ if (ret_type == T_OBJECT || ret_type == T_ARRAY) { ++ __ resolve_jobject(x10, xthread, t1); ++ } ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ if (!is_critical_native) { ++ // reset handle block ++ __ ld(x12, Address(xthread, JavaThread::active_handles_offset())); ++ __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes())); ++ } ++ ++ __ leave(); ++ ++ if (!is_critical_native) { ++ // Any exception pending? ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ bnez(t0, exception_pending); ++ } ++ ++ // We're done ++ __ ret(); ++ ++ // Unexpected paths are out of line and go here ++ ++ if (!is_critical_native) { ++ // forward the exception ++ __ bind(exception_pending); ++ ++ // and forward the exception ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ } ++ ++ // Slow path locking & unlocking ++ if (method->is_synchronized()) { ++ ++ __ block_comment("Slow path lock {"); ++ __ bind(slow_path_lock); ++ ++ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM ++ // args are (oop obj, BasicLock* lock, JavaThread* thread) ++ ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ ++ __ mv(c_rarg0, obj_reg); ++ __ mv(c_rarg1, lock_reg); ++ __ mv(c_rarg2, xthread); ++ ++ // Not a leaf but we have last_Java_frame setup as we want ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ beqz(t0, L); ++ __ stop("no pending exception allowed on exit from monitorenter"); ++ __ bind(L); ++ } ++#endif ++ __ j(lock_done); ++ ++ __ block_comment("} Slow path lock"); ++ ++ __ block_comment("Slow path unlock {"); ++ __ bind(slow_path_unlock); ++ ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ ++ __ mv(c_rarg2, xthread); ++ __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); ++ __ mv(c_rarg0, obj_reg); ++ ++ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) ++ // NOTE that obj_reg == x9 currently ++ __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ ++ __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ beqz(t0, L); ++ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); ++ __ bind(L); ++ } ++#endif /* ASSERT */ ++ ++ __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ __ j(unlock_done); ++ ++ __ block_comment("} Slow path unlock"); ++ ++ } // synchronized ++ ++ // SLOW PATH Reguard the stack if needed ++ ++ __ bind(reguard); ++ save_native_result(masm, ret_type, stack_slots); ++ __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); ++ restore_native_result(masm, ret_type, stack_slots); ++ // and continue ++ __ j(reguard_done); ++ ++ // SLOW PATH safepoint ++ { ++ __ block_comment("safepoint {"); ++ __ bind(safepoint_in_progress); ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // ++ save_native_result(masm, ret_type, stack_slots); ++ __ mv(c_rarg0, xthread); ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ int32_t offset = 0; ++ if (!is_critical_native) { ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset); ++ } else { ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)), offset); ++ } ++ __ jalr(x1, t0, offset); ++ // Restore any method result value ++ restore_native_result(masm, ret_type, stack_slots); ++ ++ if (is_critical_native) { ++ // The call above performed the transition to thread_in_Java so ++ // skip the transition logic above. ++ __ j(after_transition); ++ } ++ ++ __ j(safepoint_in_progress_done); ++ __ block_comment("} safepoint"); ++ } ++ ++ // SLOW PATH dtrace support ++ { ++ __ block_comment("dtrace entry {"); ++ __ bind(dtrace_method_entry); ++ ++ // We have all of the arguments setup at this point. We must not touch any register ++ // argument registers at this point (what if we save/restore them there are no oop? ++ ++ save_args(masm, total_c_args, c_arg, out_regs); ++ __ mov_metadata(c_rarg1, method()); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ xthread, c_rarg1); ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ __ j(dtrace_method_entry_done); ++ __ block_comment("} dtrace entry"); ++ } ++ ++ { ++ __ block_comment("dtrace exit {"); ++ __ bind(dtrace_method_exit); ++ save_native_result(masm, ret_type, stack_slots); ++ __ mov_metadata(c_rarg1, method()); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ xthread, c_rarg1); ++ restore_native_result(masm, ret_type, stack_slots); ++ __ j(dtrace_method_exit_done); ++ __ block_comment("} dtrace exit"); ++ } ++ ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), ++ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), ++ oop_maps); ++ assert(nm != NULL, "create native nmethod fail!"); ++ if (is_critical_native) { ++ nm->set_lazy_critical_native(true); ++ } ++ ++ return nm; ++} ++ ++// this function returns the adjust size (in number of words) to a c2i adapter ++// activation for use during deoptimization ++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { ++ assert(callee_locals >= callee_parameters, ++ "test and remove; got more parms than locals"); ++ if (callee_locals < callee_parameters) { ++ return 0; // No adjustment for negative locals ++ } ++ int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; ++ // diff is counted in stack words ++ return align_up(diff, 2); ++} ++ ++//------------------------------generate_deopt_blob---------------------------- ++void SharedRuntime::generate_deopt_blob() { ++ // Allocate space for the code ++ ResourceMark rm; ++ // Setup code generation tools ++ int pad = 0; ++ CodeBuffer buffer("deopt_blob", 2048 + pad, 1024); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ int frame_size_in_words = -1; ++ OopMap* map = NULL; ++ OopMapSet *oop_maps = new OopMapSet(); ++ assert_cond(masm != NULL && oop_maps != NULL); ++ RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0); ++ ++ // ------------- ++ // This code enters when returning to a de-optimized nmethod. A return ++ // address has been pushed on the the stack, and return values are in ++ // registers. ++ // If we are doing a normal deopt then we were called from the patched ++ // nmethod from the point we returned to the nmethod. So the return ++ // address on the stack is wrong by NativeCall::instruction_size ++ // We will adjust the value so it looks like we have the original return ++ // address on the stack (like when we eagerly deoptimized). ++ // In the case of an exception pending when deoptimizing, we enter ++ // with a return address on the stack that points after the call we patched ++ // into the exception handler. We have the following register state from, ++ // e.g., the forward exception stub (see stubGenerator_riscv.cpp). ++ // x10: exception oop ++ // x9: exception handler ++ // x13: throwing pc ++ // So in this case we simply jam x13 into the useless return address and ++ // the stack looks just like we want. ++ // ++ // At this point we need to de-opt. We save the argument return ++ // registers. We call the first C routine, fetch_unroll_info(). This ++ // routine captures the return values and returns a structure which ++ // describes the current frame size and the sizes of all replacement frames. ++ // The current frame is compiled code and may contain many inlined ++ // functions, each with their own JVM state. We pop the current frame, then ++ // push all the new frames. Then we call the C routine unpack_frames() to ++ // populate these frames. Finally unpack_frames() returns us the new target ++ // address. Notice that callee-save registers are BLOWN here; they have ++ // already been captured in the vframeArray at the time the return PC was ++ // patched. ++ address start = __ pc(); ++ Label cont; ++ ++ // Prolog for non exception case! ++ ++ // Save everything in sight. ++ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ ++ // Normal deoptimization. Save exec mode for unpack_frames. ++ __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved ++ __ j(cont); ++ ++ int reexecute_offset = __ pc() - start; ++ ++ // Reexecute case ++ // return address is the pc describes what bci to do re-execute at ++ ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ ++ __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved ++ __ j(cont); ++ ++ int exception_offset = __ pc() - start; ++ ++ // Prolog for exception case ++ ++ // all registers are dead at this entry point, except for x10, and ++ // x13 which contain the exception oop and exception pc ++ // respectively. Set them in TLS and fall thru to the ++ // unpack_with_exception_in_tls entry point. ++ ++ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ ++ int exception_in_tls_offset = __ pc() - start; ++ ++ // new implementation because exception oop is now passed in JavaThread ++ ++ // Prolog for exception case ++ // All registers must be preserved because they might be used by LinearScan ++ // Exceptiop oop and throwing PC are passed in JavaThread ++ // tos: stack at point of call to method that threw the exception (i.e. only ++ // args are on the stack, no return address) ++ ++ // The return address pushed by save_live_registers will be patched ++ // later with the throwing pc. The correct value is not available ++ // now because loading it from memory would destroy registers. ++ ++ // NB: The SP at this point must be the SP of the method that is ++ // being deoptimized. Deoptimization assumes that the frame created ++ // here by save_live_registers is immediately below the method's SP. ++ // This is a somewhat fragile mechanism. ++ ++ // Save everything in sight. ++ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ ++ // Now it is safe to overwrite any register ++ ++ // Deopt during an exception. Save exec mode for unpack_frames. ++ __ mv(xcpool, Deoptimization::Unpack_exception); // callee-saved ++ ++ // load throwing pc from JavaThread and patch it as the return address ++ // of the current frame. Then clear the field in JavaThread ++ ++ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ __ sd(x13, Address(fp, frame::return_addr_offset * wordSize)); ++ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); ++ ++#ifdef ASSERT ++ // verify that there is really an exception oop in JavaThread ++ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ verify_oop(x10); ++ ++ // verify that there is no pending exception ++ Label no_pending_exception; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, no_pending_exception); ++ __ stop("must not have pending exception here"); ++ __ bind(no_pending_exception); ++#endif ++ ++ __ bind(cont); ++ ++ // Call C code. Need thread and this frame, but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. ++ // ++ // UnrollBlock* fetch_unroll_info(JavaThread* thread) ++ ++ // fetch_unroll_info needs to call last_java_frame(). ++ ++ Label retaddr; ++ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(t0, Address(xthread, ++ JavaThread::last_Java_fp_offset())); ++ __ beqz(t0, L); ++ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ __ mv(c_rarg0, xthread); ++ __ mv(c_rarg1, xcpool); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset); ++ __ jalr(x1, t0, offset); ++ __ bind(retaddr); ++ ++ // Need to have an oopmap that tells fetch_unroll_info where to ++ // find any register it might need. ++ oop_maps->add_gc_map(__ pc() - start, map); ++ ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock* into x15 ++ __ mv(x15, x10); ++ ++ __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); ++ Label noException; ++ __ mv(t0, Deoptimization::Unpack_exception); ++ __ bne(xcpool, t0, noException); // Was exception pending? ++ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ ld(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); ++ ++ __ verify_oop(x10); ++ ++ // Overwrite the result registers with the exception results. ++ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); ++ ++ __ bind(noException); ++ ++ // Only register save data is on the stack. ++ // Now restore the result registers. Everything else is either dead ++ // or captured in the vframeArray. ++ reg_saver.restore_result_registers(masm); ++ ++ // All of the register save area has been popped of the stack. Only the ++ // return address remains. ++ ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). ++ // ++ // Note: by leaving the return address of self-frame on the stack ++ // and using the size of frame 2 to adjust the stack ++ // when we are done the return to frame 3 will still be on the stack. ++ ++ // Pop deoptimized frame ++ __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); ++ __ sub(x12, x12, 2 * wordSize); ++ __ add(sp, sp, x12); ++ __ ld(fp, Address(sp, 0)); ++ __ ld(ra, Address(sp, wordSize)); ++ __ addi(sp, sp, 2 * wordSize); ++ // RA should now be the return address to the caller (3) ++ ++#ifdef ASSERT ++ // Compilers generate code that bang the stack by as much as the ++ // interpreter would need. So this stack banging should never ++ // trigger a fault. Verify that it does not on non product builds. ++ if (UseStackBanging) { ++ __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); ++ __ bang_stack_size(x9, x12); ++ } ++#endif ++ // Load address of array of frame pcs into x12 ++ __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); ++ ++ // Load address of array of frame sizes into x14 ++ __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); ++ ++ // Load counter into x13 ++ __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); ++ ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ ++ const Register sender_sp = x16; ++ ++ __ mv(sender_sp, sp); ++ __ lwu(x9, Address(x15, ++ Deoptimization::UnrollBlock:: ++ caller_adjustment_offset_in_bytes())); ++ __ sub(sp, sp, x9); ++ ++ // Push interpreter frames in a loop ++ __ mv(t0, (uint64_t)0xDEADDEAD); // Make a recognizable pattern ++ __ mv(t1, t0); ++ Label loop; ++ __ bind(loop); ++ __ ld(x9, Address(x14, 0)); // Load frame size ++ __ addi(x14, x14, wordSize); ++ __ sub(x9, x9, 2 * wordSize); // We'll push pc and fp by hand ++ __ ld(ra, Address(x12, 0)); // Load pc ++ __ addi(x12, x12, wordSize); ++ __ enter(); // Save old & set new fp ++ __ sub(sp, sp, x9); // Prolog ++ // This value is corrected by layout_activation_impl ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable ++ __ mv(sender_sp, sp); // Pass sender_sp to next frame ++ __ addi(x13, x13, -1); // Decrement counter ++ __ bnez(x13, loop); ++ ++ // Re-push self-frame ++ __ ld(ra, Address(x12)); ++ __ enter(); ++ ++ // Allocate a full sized register save area. We subtract 2 because ++ // enter() just pushed 2 words ++ __ sub(sp, sp, (frame_size_in_words - 2) * wordSize); ++ ++ // Restore frame locals after moving the frame ++ __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); ++ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); ++ ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ // ++ // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) ++ ++ // Use fp because the frames look interpreted now ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, fp, the_pc, t0); ++ ++ __ mv(c_rarg0, xthread); ++ __ mv(c_rarg1, xcpool); // second arg: exec_mode ++ offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); ++ __ jalr(x1, t0, offset); ++ ++ // Set an oopmap for the call site ++ // Use the same PC we used for the last java frame ++ oop_maps->add_gc_map(the_pc - start, ++ new OopMap( frame_size_in_words, 0 )); ++ ++ // Clear fp AND pc ++ __ reset_last_Java_frame(true); ++ ++ // Collect return values ++ __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10))); ++ __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10))); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog ++ ++ // Jump to interpreter ++ __ ret(); ++ ++ // Make sure all code is generated ++ masm->flush(); ++ ++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); ++ assert(_deopt_blob != NULL, "create deoptimization blob fail!"); ++ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++} ++ ++uint SharedRuntime::out_preserve_stack_slots() { ++ return 0; ++} ++ ++#ifdef COMPILER2 ++//------------------------------generate_uncommon_trap_blob-------------------- ++void SharedRuntime::generate_uncommon_trap_blob() { ++ // Allocate space for the code ++ ResourceMark rm; ++ // Setup code generation tools ++ CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ assert_cond(masm != NULL); ++ ++ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); ++ ++ address start = __ pc(); ++ ++ // Push self-frame. We get here with a return address in RA ++ // and sp should be 16 byte aligned ++ // push fp and retaddr by hand ++ __ addi(sp, sp, -2 * wordSize); ++ __ sd(ra, Address(sp, wordSize)); ++ __ sd(fp, Address(sp, 0)); ++ // we don't expect an arg reg save area ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ // compiler left unloaded_class_index in j_rarg0 move to where the ++ // runtime expects it. ++ __ addiw(c_rarg1, j_rarg0, 0); ++ ++ // we need to set the past SP to the stack pointer of the stub frame ++ // and the pc to the address where this runtime call will return ++ // although actually any pc in this code blob will do). ++ Label retaddr; ++ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++ ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // capture callee-saved registers as well as return values. ++ // ++ // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode) ++ // ++ // n.b. 3 gp args, 0 fp args, integral return type ++ ++ __ mv(c_rarg0, xthread); ++ __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap); ++ int32_t offset = 0; ++ __ la_patchable(t0, ++ RuntimeAddress(CAST_FROM_FN_PTR(address, ++ Deoptimization::uncommon_trap)), offset); ++ __ jalr(x1, t0, offset); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site ++ OopMapSet* oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); ++ assert_cond(oop_maps != NULL && map != NULL); ++ ++ // location of fp is known implicitly by the frame sender code ++ ++ oop_maps->add_gc_map(__ pc() - start, map); ++ ++ __ reset_last_Java_frame(false); ++ ++ // move UnrollBlock* into x14 ++ __ mv(x14, x10); ++ ++#ifdef ASSERT ++ { Label L; ++ __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); ++ __ mvw(t1, Deoptimization::Unpack_uncommon_trap); ++ __ beq(t0, t1, L); ++ __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); ++ __ bind(L); ++ } ++#endif ++ ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). ++ ++ __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog! ++ ++ // Pop deoptimized frame (int) ++ __ lwu(x12, Address(x14, ++ Deoptimization::UnrollBlock:: ++ size_of_deoptimized_frame_offset_in_bytes())); ++ __ sub(x12, x12, 2 * wordSize); ++ __ add(sp, sp, x12); ++ __ ld(fp, sp, 0); ++ __ ld(ra, sp, wordSize); ++ __ addi(sp, sp, 2 * wordSize); ++ // RA should now be the return address to the caller (3) frame ++ ++#ifdef ASSERT ++ // Compilers generate code that bang the stack by as much as the ++ // interpreter would need. So this stack banging should never ++ // trigger a fault. Verify that it does not on non product builds. ++ if (UseStackBanging) { ++ __ lwu(x11, Address(x14, ++ Deoptimization::UnrollBlock:: ++ total_frame_sizes_offset_in_bytes())); ++ __ bang_stack_size(x11, x12); ++ } ++#endif ++ ++ // Load address of array of frame pcs into x12 (address*) ++ __ ld(x12, Address(x14, ++ Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); ++ ++ // Load address of array of frame sizes into x15 (intptr_t*) ++ __ ld(x15, Address(x14, ++ Deoptimization::UnrollBlock:: ++ frame_sizes_offset_in_bytes())); ++ ++ // Counter ++ __ lwu(x13, Address(x14, ++ Deoptimization::UnrollBlock:: ++ number_of_frames_offset_in_bytes())); // (int) ++ ++ // Now adjust the caller's stack to make up for the extra locals but ++ // record the original sp so that we can save it in the skeletal ++ // interpreter frame and the stack walking of interpreter_sender ++ // will get the unextended sp value and not the "real" sp value. ++ ++ const Register sender_sp = t1; // temporary register ++ ++ __ lwu(x11, Address(x14, ++ Deoptimization::UnrollBlock:: ++ caller_adjustment_offset_in_bytes())); // (int) ++ __ mv(sender_sp, sp); ++ __ sub(sp, sp, x11); ++ ++ // Push interpreter frames in a loop ++ Label loop; ++ __ bind(loop); ++ __ ld(x11, Address(x15, 0)); // Load frame size ++ __ sub(x11, x11, 2 * wordSize); // We'll push pc and fp by hand ++ __ ld(ra, Address(x12, 0)); // Save return address ++ __ enter(); // and old fp & set new fp ++ __ sub(sp, sp, x11); // Prolog ++ __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable ++ // This value is corrected by layout_activation_impl ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ mv(sender_sp, sp); // Pass sender_sp to next frame ++ __ add(x15, x15, wordSize); // Bump array pointer (sizes) ++ __ add(x12, x12, wordSize); // Bump array pointer (pcs) ++ __ subw(x13, x13, 1); // Decrement counter ++ __ bgtz(x13, loop); ++ __ ld(ra, Address(x12, 0)); // save final return address ++ // Re-push self-frame ++ __ enter(); // & old fp & set new fp ++ ++ // Use fp because the frames look interpreted now ++ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, fp, the_pc, t0); ++ ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ // ++ // BasicType unpack_frames(JavaThread* thread, int exec_mode) ++ // ++ ++ // n.b. 2 gp args, 0 fp args, integral return type ++ ++ // sp should already be aligned ++ __ mv(c_rarg0, xthread); ++ __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap); ++ offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset); ++ __ jalr(x1, t0, offset); ++ ++ // Set an oopmap for the call site ++ // Use the same PC we used for the last java frame ++ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); ++ ++ // Clear fp AND pc ++ __ reset_last_Java_frame(true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog ++ ++ // Jump to interpreter ++ __ ret(); ++ ++ // Make sure all code is generated ++ masm->flush(); ++ ++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, ++ SimpleRuntimeFrame::framesize >> 1); ++} ++#endif // COMPILER2 ++ ++//------------------------------generate_handler_blob------ ++// ++// Generate a special Compile2Runtime blob that saves all registers, ++// and setup oopmap. ++// ++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { ++ ResourceMark rm; ++ OopMapSet *oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ OopMap* map = NULL; ++ ++ // Allocate space for the code. Setup code generation tools. ++ CodeBuffer buffer("handler_blob", 2048, 1024); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ assert_cond(masm != NULL); ++ ++ address start = __ pc(); ++ address call_pc = NULL; ++ int frame_size_in_words = -1; ++ bool cause_return = (poll_type == POLL_AT_RETURN); ++ RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); ++ ++ // Save Integer and Float registers. ++ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ ++ // The following is basically a call_VM. However, we need the precise ++ // address of the call in order to generate an oopmap. Hence, we do all the ++ // work outselves. ++ ++ Label retaddr; ++ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++ ++ // The return address must always be correct so that frame constructor never ++ // sees an invalid pc. ++ ++ if (!cause_return) { ++ // overwrite the return address pushed by save_live_registers ++ // Additionally, x18 is a callee-saved register so we can look at ++ // it later to determine if someone changed the return address for ++ // us! ++ __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset())); ++ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); ++ } ++ ++ // Do the call ++ __ mv(c_rarg0, xthread); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(call_ptr), offset); ++ __ jalr(x1, t0, offset); ++ __ bind(retaddr); ++ ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. ++ ++ oop_maps->add_gc_map( __ pc() - start, map); ++ ++ Label noException; ++ ++ __ reset_last_Java_frame(false); ++ ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, noException); ++ ++ // Exception pending ++ ++ reg_saver.restore_live_registers(masm); ++ ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ ++ // No exception case ++ __ bind(noException); ++ ++ Label no_adjust, bail; ++ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { ++ // If our stashed return pc was modified by the runtime we avoid touching it ++ __ ld(t0, Address(fp, frame::return_addr_offset * wordSize)); ++ __ bne(x18, t0, no_adjust); ++ ++#ifdef ASSERT ++ // Verify the correct encoding of the poll we're about to skip. ++ // See NativeInstruction::is_lwu_to_zr() ++ __ lwu(t0, Address(x18)); ++ __ andi(t1, t0, 0b0000011); ++ __ mv(t2, 0b0000011); ++ __ bne(t1, t2, bail); // 0-6:0b0000011 ++ __ srli(t1, t0, 7); ++ __ andi(t1, t1, 0b00000); ++ __ bnez(t1, bail); // 7-11:0b00000 ++ __ srli(t1, t0, 12); ++ __ andi(t1, t1, 0b110); ++ __ mv(t2, 0b110); ++ __ bne(t1, t2, bail); // 12-14:0b110 ++#endif ++ // Adjust return pc forward to step over the safepoint poll instruction ++ __ add(x18, x18, NativeInstruction::instruction_size); ++ __ sd(x18, Address(fp, frame::return_addr_offset * wordSize)); ++ } ++ ++ __ bind(no_adjust); ++ // Normal exit, restore registers and exit. ++ ++ reg_saver.restore_live_registers(masm); ++ __ ret(); ++ ++#ifdef ASSERT ++ __ bind(bail); ++ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); ++#endif ++ ++ // Make sure all code is generated ++ masm->flush(); ++ ++ // Fill-out other meta info ++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); ++} ++ ++// ++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss ++// ++// Generate a stub that calls into vm to find out the proper destination ++// of a java call. All the argument registers are live at this point ++// but since this is generic code we don't know what they are and the caller ++// must do any gc of the args. ++// ++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { ++ assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ // allocate space for the code ++ ResourceMark rm; ++ ++ CodeBuffer buffer(name, 1000, 512); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ assert_cond(masm != NULL); ++ ++ int frame_size_in_words = -1; ++ RegisterSaver reg_saver(false /* save_vectors */); ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ OopMap* map = NULL; ++ ++ int start = __ offset(); ++ ++ map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words); ++ ++ int frame_complete = __ offset(); ++ ++ { ++ Label retaddr; ++ __ set_last_Java_frame(sp, noreg, retaddr, t0); ++ ++ __ mv(c_rarg0, xthread); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(destination), offset); ++ __ jalr(x1, t0, offset); ++ __ bind(retaddr); ++ } ++ ++ // Set an oopmap for the call site. ++ // We need this not only for callee-saved registers, but also for volatile ++ // registers that the compiler might be keeping live across a safepoint. ++ ++ oop_maps->add_gc_map( __ offset() - start, map); ++ ++ // x10 contains the address we are going to jump to assuming no exception got installed ++ ++ // clear last_Java_sp ++ __ reset_last_Java_frame(false); ++ // check for pending exceptions ++ Label pending; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ bnez(t0, pending); ++ ++ // get the returned Method* ++ __ get_vm_result_2(xmethod, xthread); ++ __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod))); ++ ++ // x10 is where we want to jump, overwrite t0 which is saved and temporary ++ __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0))); ++ reg_saver.restore_live_registers(masm); ++ ++ // We are back the the original state on entry and ready to go. ++ ++ __ jr(t0); ++ ++ // Pending exception after the safepoint ++ ++ __ bind(pending); ++ ++ reg_saver.restore_live_registers(masm); ++ ++ // exception pending => remove activation and forward to exception handler ++ ++ __ sd(zr, Address(xthread, JavaThread::vm_result_offset())); ++ ++ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ ++ // ------------- ++ // make sure all code is generated ++ masm->flush(); ++ ++ // return the blob ++ return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); ++} ++ ++#ifdef COMPILER2 ++//------------------------------generate_exception_blob--------------------------- ++// creates exception blob at the end ++// Using exception blob, this code is jumped from a compiled method. ++// (see emit_exception_handler in riscv.ad file) ++// ++// Given an exception pc at a call we call into the runtime for the ++// handler in this method. This handler might merely restore state ++// (i.e. callee save registers) unwind the frame and jump to the ++// exception handler for the nmethod if there is no Java level handler ++// for the nmethod. ++// ++// This code is entered with a jmp. ++// ++// Arguments: ++// x10: exception oop ++// x13: exception pc ++// ++// Results: ++// x10: exception oop ++// x13: exception pc in caller ++// destination: exception handler of caller ++// ++// Note: the exception pc MUST be at a call (precise debug information) ++// Registers x10, x13, x12, x14, x15, t0 are not callee saved. ++// ++ ++void OptoRuntime::generate_exception_blob() { ++ assert(!OptoRuntime::is_callee_saved_register(R13_num), ""); ++ assert(!OptoRuntime::is_callee_saved_register(R10_num), ""); ++ assert(!OptoRuntime::is_callee_saved_register(R12_num), ""); ++ ++ assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); ++ ++ // Allocate space for the code ++ ResourceMark rm; ++ // Setup code generation tools ++ CodeBuffer buffer("exception_blob", 2048, 1024); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ assert_cond(masm != NULL); ++ ++ // TODO check various assumptions made here ++ // ++ // make sure we do so before running this ++ ++ address start = __ pc(); ++ ++ // push fp and retaddr by hand ++ // Exception pc is 'return address' for stack walker ++ __ addi(sp, sp, -2 * wordSize); ++ __ sd(ra, Address(sp, wordSize)); ++ __ sd(fp, Address(sp)); ++ // there are no callee save registers and we don't expect an ++ // arg reg save area ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ // Store exception in Thread object. We cannot pass any arguments to the ++ // handle_exception call, since we do not want to make any assumption ++ // about the size of the frame where the exception happened in. ++ __ sd(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ __ sd(x13, Address(xthread, JavaThread::exception_pc_offset())); ++ ++ // This call does all the hard work. It checks if an exception handler ++ // exists in the method. ++ // If so, it returns the handler address. ++ // If not, it prepares for stack-unwinding, restoring the callee-save ++ // registers of the frame being removed. ++ // ++ // address OptoRuntime::handle_exception_C(JavaThread* thread) ++ // ++ // n.b. 1 gp arg, 0 fp args, integral return type ++ ++ // the stack should always be aligned ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, noreg, the_pc, t0); ++ __ mv(c_rarg0, xthread); ++ int32_t offset = 0; ++ __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset); ++ __ jalr(x1, t0, offset); ++ ++ // Set an oopmap for the call site. This oopmap will only be used if we ++ // are unwinding the stack. Hence, all locations will be dead. ++ // Callee-saved registers will be the same as the frame above (i.e., ++ // handle_exception_stub), since they were restored when we got the ++ // exception. ++ ++ OopMapSet* oop_maps = new OopMapSet(); ++ assert_cond(oop_maps != NULL); ++ ++ oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); ++ ++ __ reset_last_Java_frame(false); ++ ++ // Restore callee-saved registers ++ ++ // fp is an implicitly saved callee saved register (i.e. the calling ++ // convention will save restore it in prolog/epilog) Other than that ++ // there are no callee save registers now that adapter frames are gone. ++ // and we dont' expect an arg reg save area ++ __ ld(fp, Address(sp)); ++ __ ld(x13, Address(sp, wordSize)); ++ __ addi(sp, sp , 2 * wordSize); ++ ++ // x10: exception handler ++ ++ // We have a handler in x10 (could be deopt blob). ++ __ mv(t0, x10); ++ ++ // Get the exception oop ++ __ ld(x10, Address(xthread, JavaThread::exception_oop_offset())); ++ // Get the exception pc in case we are deoptimized ++ __ ld(x14, Address(xthread, JavaThread::exception_pc_offset())); ++#ifdef ASSERT ++ __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset())); ++ __ sd(zr, Address(xthread, JavaThread::exception_pc_offset())); ++#endif ++ // Clear the exception oop so GC no longer processes it as a root. ++ __ sd(zr, Address(xthread, JavaThread::exception_oop_offset())); ++ ++ // x10: exception oop ++ // t0: exception handler ++ // x14: exception pc ++ // Jump to handler ++ ++ __ jr(t0); ++ ++ // Make sure all code is generated ++ masm->flush(); ++ ++ // Set exception blob ++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); ++} ++#endif // COMPILER2 +diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +new file mode 100644 +index 000000000..c5b3b094c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +@@ -0,0 +1,3743 @@ ++/* ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_riscv.hpp" ++#include "oops/instanceOop.hpp" ++#include "oops/method.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "utilities/align.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++ ++// Declaration and definition of StubGenerator (no .hpp file). ++// For a more detailed description of the stub routine structure ++// see the comment in stubRoutines.hpp ++ ++#undef __ ++#define __ _masm-> ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++// Stub Code definitions ++ ++class StubGenerator: public StubCodeGenerator { ++ private: ++ ++#ifdef PRODUCT ++#define inc_counter_np(counter) ((void)0) ++#else ++ void inc_counter_np_(int& counter) { ++ __ la(t1, ExternalAddress((address)&counter)); ++ __ lwu(t0, Address(t1, 0)); ++ __ addiw(t0, t0, 1); ++ __ sw(t0, Address(t1, 0)); ++ } ++#define inc_counter_np(counter) \ ++ BLOCK_COMMENT("inc_counter " #counter); \ ++ inc_counter_np_(counter); ++#endif ++ ++ // Call stubs are used to call Java from C ++ // ++ // Arguments: ++ // c_rarg0: call wrapper address address ++ // c_rarg1: result address ++ // c_rarg2: result type BasicType ++ // c_rarg3: method Method* ++ // c_rarg4: (interpreter) entry point address ++ // c_rarg5: parameters intptr_t* ++ // c_rarg6: parameter size (in words) int ++ // c_rarg7: thread Thread* ++ // ++ // There is no return from the stub itself as any Java result ++ // is written to result ++ // ++ // we save x1 (ra) as the return PC at the base of the frame and ++ // link x8 (fp) below it as the frame pointer installing sp (x2) ++ // into fp. ++ // ++ // we save x10-x17, which accounts for all the c arguments. ++ // ++ // TODO: strictly do we need to save them all? they are treated as ++ // volatile by C so could we omit saving the ones we are going to ++ // place in global registers (thread? method?) or those we only use ++ // during setup of the Java call? ++ // ++ // we don't need to save x5 which C uses as an indirect result location ++ // return register. ++ // ++ // we don't need to save x6-x7 and x28-x31 which both C and Java treat as ++ // volatile ++ // ++ // we save x9, x18-x27, f8-f9, and f18-f27 which Java uses as temporary ++ // registers and C expects to be callee-save ++ // ++ // so the stub frame looks like this when we enter Java code ++ // ++ // [ return_from_Java ] <--- sp ++ // [ argument word n ] ++ // ... ++ // -34 [ argument word 1 ] ++ // -33 [ saved f27 ] <--- sp_after_call ++ // -32 [ saved f26 ] ++ // -31 [ saved f25 ] ++ // -30 [ saved f24 ] ++ // -29 [ saved f23 ] ++ // -28 [ saved f22 ] ++ // -27 [ saved f21 ] ++ // -26 [ saved f20 ] ++ // -25 [ saved f19 ] ++ // -24 [ saved f18 ] ++ // -23 [ saved f9 ] ++ // -22 [ saved f8 ] ++ // -21 [ saved x27 ] ++ // -20 [ saved x26 ] ++ // -19 [ saved x25 ] ++ // -18 [ saved x24 ] ++ // -17 [ saved x23 ] ++ // -16 [ saved x22 ] ++ // -15 [ saved x21 ] ++ // -14 [ saved x20 ] ++ // -13 [ saved x19 ] ++ // -12 [ saved x18 ] ++ // -11 [ saved x9 ] ++ // -10 [ call wrapper (x10) ] ++ // -9 [ result (x11) ] ++ // -8 [ result type (x12) ] ++ // -7 [ method (x13) ] ++ // -6 [ entry point (x14) ] ++ // -5 [ parameters (x15) ] ++ // -4 [ parameter size (x16) ] ++ // -3 [ thread (x17) ] ++ // -2 [ saved fp (x8) ] ++ // -1 [ saved ra (x1) ] ++ // 0 [ ] <--- fp == saved sp (x2) ++ ++ // Call stub stack layout word offsets from fp ++ enum call_stub_layout { ++ sp_after_call_off = -33, ++ ++ f27_off = -33, ++ f26_off = -32, ++ f25_off = -31, ++ f24_off = -30, ++ f23_off = -29, ++ f22_off = -28, ++ f21_off = -27, ++ f20_off = -26, ++ f19_off = -25, ++ f18_off = -24, ++ f9_off = -23, ++ f8_off = -22, ++ ++ x27_off = -21, ++ x26_off = -20, ++ x25_off = -19, ++ x24_off = -18, ++ x23_off = -17, ++ x22_off = -16, ++ x21_off = -15, ++ x20_off = -14, ++ x19_off = -13, ++ x18_off = -12, ++ x9_off = -11, ++ ++ call_wrapper_off = -10, ++ result_off = -9, ++ result_type_off = -8, ++ method_off = -7, ++ entry_point_off = -6, ++ parameters_off = -5, ++ parameter_size_off = -4, ++ thread_off = -3, ++ fp_f = -2, ++ retaddr_off = -1, ++ }; ++ ++ address generate_call_stub(address& return_address) { ++ assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 && ++ (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, ++ "adjust this code"); ++ ++ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address start = __ pc(); ++ ++ const Address sp_after_call (fp, sp_after_call_off * wordSize); ++ ++ const Address call_wrapper (fp, call_wrapper_off * wordSize); ++ const Address result (fp, result_off * wordSize); ++ const Address result_type (fp, result_type_off * wordSize); ++ const Address method (fp, method_off * wordSize); ++ const Address entry_point (fp, entry_point_off * wordSize); ++ const Address parameters (fp, parameters_off * wordSize); ++ const Address parameter_size(fp, parameter_size_off * wordSize); ++ ++ const Address thread (fp, thread_off * wordSize); ++ ++ const Address f27_save (fp, f27_off * wordSize); ++ const Address f26_save (fp, f26_off * wordSize); ++ const Address f25_save (fp, f25_off * wordSize); ++ const Address f24_save (fp, f24_off * wordSize); ++ const Address f23_save (fp, f23_off * wordSize); ++ const Address f22_save (fp, f22_off * wordSize); ++ const Address f21_save (fp, f21_off * wordSize); ++ const Address f20_save (fp, f20_off * wordSize); ++ const Address f19_save (fp, f19_off * wordSize); ++ const Address f18_save (fp, f18_off * wordSize); ++ const Address f9_save (fp, f9_off * wordSize); ++ const Address f8_save (fp, f8_off * wordSize); ++ ++ const Address x27_save (fp, x27_off * wordSize); ++ const Address x26_save (fp, x26_off * wordSize); ++ const Address x25_save (fp, x25_off * wordSize); ++ const Address x24_save (fp, x24_off * wordSize); ++ const Address x23_save (fp, x23_off * wordSize); ++ const Address x22_save (fp, x22_off * wordSize); ++ const Address x21_save (fp, x21_off * wordSize); ++ const Address x20_save (fp, x20_off * wordSize); ++ const Address x19_save (fp, x19_off * wordSize); ++ const Address x18_save (fp, x18_off * wordSize); ++ ++ const Address x9_save (fp, x9_off * wordSize); ++ ++ // stub code ++ ++ address riscv_entry = __ pc(); ++ ++ // set up frame and move sp to end of save area ++ __ enter(); ++ __ addi(sp, fp, sp_after_call_off * wordSize); ++ ++ // save register parameters and Java temporary/global registers ++ // n.b. we save thread even though it gets installed in ++ // xthread because we want to sanity check tp later ++ __ sd(c_rarg7, thread); ++ __ sw(c_rarg6, parameter_size); ++ __ sd(c_rarg5, parameters); ++ __ sd(c_rarg4, entry_point); ++ __ sd(c_rarg3, method); ++ __ sd(c_rarg2, result_type); ++ __ sd(c_rarg1, result); ++ __ sd(c_rarg0, call_wrapper); ++ ++ __ sd(x9, x9_save); ++ ++ __ sd(x18, x18_save); ++ __ sd(x19, x19_save); ++ __ sd(x20, x20_save); ++ __ sd(x21, x21_save); ++ __ sd(x22, x22_save); ++ __ sd(x23, x23_save); ++ __ sd(x24, x24_save); ++ __ sd(x25, x25_save); ++ __ sd(x26, x26_save); ++ __ sd(x27, x27_save); ++ ++ __ fsd(f8, f8_save); ++ __ fsd(f9, f9_save); ++ __ fsd(f18, f18_save); ++ __ fsd(f19, f19_save); ++ __ fsd(f20, f20_save); ++ __ fsd(f21, f21_save); ++ __ fsd(f22, f22_save); ++ __ fsd(f23, f23_save); ++ __ fsd(f24, f24_save); ++ __ fsd(f25, f25_save); ++ __ fsd(f26, f26_save); ++ __ fsd(f27, f27_save); ++ ++ // install Java thread in global register now we have saved ++ // whatever value it held ++ __ mv(xthread, c_rarg7); ++ ++ // And method ++ __ mv(xmethod, c_rarg3); ++ ++ // set up the heapbase register ++ __ reinit_heapbase(); ++ ++#ifdef ASSERT ++ // make sure we have no pending exceptions ++ { ++ Label L; ++ __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset()))); ++ __ beqz(t0, L); ++ __ stop("StubRoutines::call_stub: entered with pending exception"); ++ __ BIND(L); ++ } ++#endif ++ // pass parameters if any ++ __ mv(esp, sp); ++ __ slli(t0, c_rarg6, LogBytesPerWord); ++ __ sub(t0, sp, t0); // Move SP out of the way ++ __ andi(sp, t0, -2 * wordSize); ++ ++ BLOCK_COMMENT("pass parameters if any"); ++ Label parameters_done; ++ // parameter count is still in c_rarg6 ++ // and parameter pointer identifying param 1 is in c_rarg5 ++ __ beqz(c_rarg6, parameters_done); ++ ++ address loop = __ pc(); ++ __ ld(t0, c_rarg5, 0); ++ __ addi(c_rarg5, c_rarg5, wordSize); ++ __ addi(c_rarg6, c_rarg6, -1); ++ __ push_reg(t0); ++ __ bgtz(c_rarg6, loop); ++ ++ __ BIND(parameters_done); ++ ++ // call Java entry -- passing methdoOop, and current sp ++ // xmethod: Method* ++ // x30: sender sp ++ BLOCK_COMMENT("call Java function"); ++ __ mv(x30, sp); ++ __ jalr(c_rarg4); ++ ++ // save current address for use by exception handling code ++ ++ return_address = __ pc(); ++ ++ // store result depending on type (everything that is not ++ // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) ++ // n.b. this assumes Java returns an integral result in x10 ++ // and a floating result in j_farg0 ++ __ ld(j_rarg2, result); ++ Label is_long, is_float, is_double, exit; ++ __ ld(j_rarg1, result_type); ++ __ mv(t0, (u1)T_OBJECT); ++ __ beq(j_rarg1, t0, is_long); ++ __ mv(t0, (u1)T_LONG); ++ __ beq(j_rarg1, t0, is_long); ++ __ mv(t0, (u1)T_FLOAT); ++ __ beq(j_rarg1, t0, is_float); ++ __ mv(t0, (u1)T_DOUBLE); ++ __ beq(j_rarg1, t0, is_double); ++ ++ // handle T_INT case ++ __ sw(x10, Address(j_rarg2)); ++ ++ __ BIND(exit); ++ ++ // pop parameters ++ __ addi(esp, fp, sp_after_call_off * wordSize); ++ ++#ifdef ASSERT ++ // verify that threads correspond ++ { ++ Label L, S; ++ __ ld(t0, thread); ++ __ bne(xthread, t0, S); ++ __ get_thread(t0); ++ __ beq(xthread, t0, L); ++ __ BIND(S); ++ __ stop("StubRoutines::call_stub: threads must correspond"); ++ __ BIND(L); ++ } ++#endif ++ ++ // restore callee-save registers ++ __ fld(f27, f27_save); ++ __ fld(f26, f26_save); ++ __ fld(f25, f25_save); ++ __ fld(f24, f24_save); ++ __ fld(f23, f23_save); ++ __ fld(f22, f22_save); ++ __ fld(f21, f21_save); ++ __ fld(f20, f20_save); ++ __ fld(f19, f19_save); ++ __ fld(f18, f18_save); ++ __ fld(f9, f9_save); ++ __ fld(f8, f8_save); ++ ++ __ ld(x27, x27_save); ++ __ ld(x26, x26_save); ++ __ ld(x25, x25_save); ++ __ ld(x24, x24_save); ++ __ ld(x23, x23_save); ++ __ ld(x22, x22_save); ++ __ ld(x21, x21_save); ++ __ ld(x20, x20_save); ++ __ ld(x19, x19_save); ++ __ ld(x18, x18_save); ++ ++ __ ld(x9, x9_save); ++ ++ __ ld(c_rarg0, call_wrapper); ++ __ ld(c_rarg1, result); ++ __ ld(c_rarg2, result_type); ++ __ ld(c_rarg3, method); ++ __ ld(c_rarg4, entry_point); ++ __ ld(c_rarg5, parameters); ++ __ ld(c_rarg6, parameter_size); ++ __ ld(c_rarg7, thread); ++ ++ // leave frame and return to caller ++ __ leave(); ++ __ ret(); ++ ++ // handle return types different from T_INT ++ ++ __ BIND(is_long); ++ __ sd(x10, Address(j_rarg2, 0)); ++ __ j(exit); ++ ++ __ BIND(is_float); ++ __ fsw(j_farg0, Address(j_rarg2, 0), t0); ++ __ j(exit); ++ ++ __ BIND(is_double); ++ __ fsd(j_farg0, Address(j_rarg2, 0), t0); ++ __ j(exit); ++ ++ return start; ++ } ++ ++ // Return point for a Java call if there's an exception thrown in ++ // Java code. The exception is caught and transformed into a ++ // pending exception stored in JavaThread that can be tested from ++ // within the VM. ++ // ++ // Note: Usually the parameters are removed by the callee. In case ++ // of an exception crossing an activation frame boundary, that is ++ // not the case if the callee is compiled code => need to setup the ++ // sp. ++ // ++ // x10: exception oop ++ ++ address generate_catch_exception() { ++ StubCodeMark mark(this, "StubRoutines", "catch_exception"); ++ address start = __ pc(); ++ ++ // same as in generate_call_stub(): ++ const Address thread(fp, thread_off * wordSize); ++ ++#ifdef ASSERT ++ // verify that threads correspond ++ { ++ Label L, S; ++ __ ld(t0, thread); ++ __ bne(xthread, t0, S); ++ __ get_thread(t0); ++ __ beq(xthread, t0, L); ++ __ bind(S); ++ __ stop("StubRoutines::catch_exception: threads must correspond"); ++ __ bind(L); ++ } ++#endif ++ ++ // set pending exception ++ __ verify_oop(x10); ++ ++ __ sd(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ mv(t0, (address)__FILE__); ++ __ sd(t0, Address(xthread, Thread::exception_file_offset())); ++ __ mv(t0, (int)__LINE__); ++ __ sw(t0, Address(xthread, Thread::exception_line_offset())); ++ ++ // complete return to VM ++ assert(StubRoutines::_call_stub_return_address != NULL, ++ "_call_stub_return_address must have been generated before"); ++ __ j(StubRoutines::_call_stub_return_address); ++ ++ return start; ++ } ++ ++ // Continuation point for runtime calls returning with a pending ++ // exception. The pending exception check happened in the runtime ++ // or native call stub. The pending exception in Thread is ++ // converted into a Java-level exception. ++ // ++ // Contract with Java-level exception handlers: ++ // x10: exception ++ // x13: throwing pc ++ // ++ // NOTE: At entry of this stub, exception-pc must be in RA !! ++ ++ // NOTE: this is always used as a jump target within generated code ++ // so it just needs to be generated code with no x86 prolog ++ ++ address generate_forward_exception() { ++ StubCodeMark mark(this, "StubRoutines", "forward exception"); ++ address start = __ pc(); ++ ++ // Upon entry, RA points to the return address returning into ++ // Java (interpreted or compiled) code; i.e., the return address ++ // becomes the throwing pc. ++ // ++ // Arguments pushed before the runtime call are still on the stack ++ // but the exception handler will reset the stack pointer -> ++ // ignore them. A potential result in registers can be ignored as ++ // well. ++ ++#ifdef ASSERT ++ // make sure this code is only executed if there is a pending exception ++ { ++ Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ bnez(t0, L); ++ __ stop("StubRoutines::forward exception: no pending exception (1)"); ++ __ bind(L); ++ } ++#endif ++ ++ // compute exception handler into x9 ++ ++ // call the VM to find the handler address associated with the ++ // caller address. pass thread in x10 and caller pc (ret address) ++ // in x11. n.b. the caller pc is in ra, unlike x86 where it is on ++ // the stack. ++ __ mv(c_rarg1, ra); ++ // ra will be trashed by the VM call so we move it to x9 ++ // (callee-saved) because we also need to pass it to the handler ++ // returned by this call. ++ __ mv(x9, ra); ++ BLOCK_COMMENT("call exception_handler_for_return_address"); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ++ SharedRuntime::exception_handler_for_return_address), ++ xthread, c_rarg1); ++ // we should not really care that ra is no longer the callee ++ // address. we saved the value the handler needs in x9 so we can ++ // just copy it to x13. however, the C2 handler will push its own ++ // frame and then calls into the VM and the VM code asserts that ++ // the PC for the frame above the handler belongs to a compiled ++ // Java method. So, we restore ra here to satisfy that assert. ++ __ mv(ra, x9); ++ // setup x10 & x13 & clear pending exception ++ __ mv(x13, x9); ++ __ mv(x9, x10); ++ __ ld(x10, Address(xthread, Thread::pending_exception_offset())); ++ __ sd(zr, Address(xthread, Thread::pending_exception_offset())); ++ ++#ifdef ASSERT ++ // make sure exception is set ++ { ++ Label L; ++ __ bnez(x10, L); ++ __ stop("StubRoutines::forward exception: no pending exception (2)"); ++ __ bind(L); ++ } ++#endif ++ ++ // continue at exception handler ++ // x10: exception ++ // x13: throwing pc ++ // x9: exception handler ++ __ verify_oop(x10); ++ __ jr(x9); ++ ++ return start; ++ } ++ ++ // Non-destructive plausibility checks for oops ++ // ++ // Arguments: ++ // x10: oop to verify ++ // t0: error message ++ // ++ // Stack after saving c_rarg3: ++ // [tos + 0]: saved c_rarg3 ++ // [tos + 1]: saved c_rarg2 ++ // [tos + 2]: saved ra ++ // [tos + 3]: saved t1 ++ // [tos + 4]: saved x10 ++ // [tos + 5]: saved t0 ++ address generate_verify_oop() { ++ ++ StubCodeMark mark(this, "StubRoutines", "verify_oop"); ++ address start = __ pc(); ++ ++ Label exit, error; ++ ++ __ push_reg(RegSet::of(c_rarg2, c_rarg3), sp); // save c_rarg2 and c_rarg3 ++ ++ __ la(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr())); ++ __ ld(c_rarg3, Address(c_rarg2)); ++ __ add(c_rarg3, c_rarg3, 1); ++ __ sd(c_rarg3, Address(c_rarg2)); ++ ++ // object is in x10 ++ // make sure object is 'reasonable' ++ __ beqz(x10, exit); // if obj is NULL it is OK ++ ++ // Check if the oop is in the right area of memory ++ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_mask()); ++ __ andr(c_rarg2, x10, c_rarg3); ++ __ mv(c_rarg3, (intptr_t) Universe::verify_oop_bits()); ++ ++ // Compare c_rarg2 and c_rarg3 ++ __ bne(c_rarg2, c_rarg3, error); ++ ++ // make sure klass is 'reasonable', which is not zero. ++ __ load_klass(x10, x10); // get klass ++ __ beqz(x10, error); // if klass is NULL it is broken ++ ++ // return if everything seems ok ++ __ bind(exit); ++ ++ __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3 ++ __ ret(); ++ ++ // handle errors ++ __ bind(error); ++ __ pop_reg(RegSet::of(c_rarg2, c_rarg3), sp); // pop c_rarg2 and c_rarg3 ++ ++ __ push_reg(RegSet::range(x0, x31), sp); ++ // prepare parameters for debug64, c_rarg0: address of error message, ++ // c_rarg1: return address, c_rarg2: address of regs on stack ++ __ mv(c_rarg0, t0); // pass address of error message ++ __ mv(c_rarg1, ra); // pass return address ++ __ mv(c_rarg2, sp); // pass address of regs on stack ++#ifndef PRODUCT ++ assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); ++#endif ++ BLOCK_COMMENT("call MacroAssembler::debug"); ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, CAST_FROM_FN_PTR(address, MacroAssembler::debug64), offset); ++ __ jalr(x1, t0, offset); ++ ++ return start; ++ } ++ ++ // The inner part of zero_words(). ++ // ++ // Inputs: ++ // x28: the HeapWord-aligned base address of an array to zero. ++ // x29: the count in HeapWords, x29 > 0. ++ // ++ // Returns x28 and x29, adjusted for the caller to clear. ++ // x28: the base address of the tail of words left to clear. ++ // x29: the number of words in the tail. ++ // x29 < MacroAssembler::zero_words_block_size. ++ ++ address generate_zero_blocks() { ++ Label done; ++ ++ const Register base = x28, cnt = x29; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "zero_blocks"); ++ address start = __ pc(); ++ ++ { ++ // Clear the remaining blocks. ++ Label loop; ++ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); ++ __ bltz(cnt, done); ++ __ bind(loop); ++ for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) { ++ __ sd(zr, Address(base, 0)); ++ __ add(base, base, 8); ++ } ++ __ sub(cnt, cnt, MacroAssembler::zero_words_block_size); ++ __ bgez(cnt, loop); ++ __ bind(done); ++ __ add(cnt, cnt, MacroAssembler::zero_words_block_size); ++ } ++ ++ __ ret(); ++ ++ return start; ++ } ++ ++ typedef void (MacroAssembler::*copy_insn)(Register R1, Register R2, const int32_t offset); ++ ++ void copy_by_step(RegSet tmp_regs, Register src, Register dst, ++ unsigned unroll_factor, int unit) { ++ unsigned char regs[32]; ++ int offset = unit < 0 ? unit : 0; ++ ++ // Scan bitset to get tmp regs ++ unsigned int regsSize = 0; ++ unsigned bitset = tmp_regs.bits(); ++ ++ assert(((bitset & (1 << (src->encoding()))) == 0), "src should not in tmp regs"); ++ assert(((bitset & (1 << (dst->encoding()))) == 0), "dst should not in tmp regs"); ++ ++ for (int reg = 31; reg >= 0; reg--) { ++ if ((1U << 31) & bitset) { ++ regs[regsSize++] = reg; ++ } ++ bitset <<= 1; ++ } ++ ++ copy_insn ld_arr = NULL, st_arr = NULL; ++ switch (abs(unit)) { ++ case 1 : ++ ld_arr = (copy_insn)&MacroAssembler::lbu; ++ st_arr = (copy_insn)&MacroAssembler::sb; ++ break; ++ case BytesPerShort : ++ ld_arr = (copy_insn)&MacroAssembler::lhu; ++ st_arr = (copy_insn)&MacroAssembler::sh; ++ break; ++ case BytesPerInt : ++ ld_arr = (copy_insn)&MacroAssembler::lwu; ++ st_arr = (copy_insn)&MacroAssembler::sw; ++ break; ++ case BytesPerLong : ++ ld_arr = (copy_insn)&MacroAssembler::ld; ++ st_arr = (copy_insn)&MacroAssembler::sd; ++ break; ++ default : ++ ShouldNotReachHere(); ++ } ++ ++ for (unsigned i = 0; i < unroll_factor; i++) { ++ (_masm->*ld_arr)(as_Register(regs[0]), src, i * unit + offset); ++ (_masm->*st_arr)(as_Register(regs[0]), dst, i * unit + offset); ++ } ++ ++ __ addi(src, src, unroll_factor * unit); ++ __ addi(dst, dst, unroll_factor * unit); ++ } ++ ++ void copy_tail(Register src, Register dst, Register count_in_bytes, Register tmp, ++ int ele_size, unsigned align_unit) { ++ bool is_backwards = ele_size < 0; ++ size_t granularity = uabs(ele_size); ++ for (unsigned unit = (align_unit >> 1); unit >= granularity; unit >>= 1) { ++ int offset = is_backwards ? (int)(-unit) : unit; ++ Label exit; ++ __ andi(tmp, count_in_bytes, unit); ++ __ beqz(tmp, exit); ++ copy_by_step(RegSet::of(tmp), src, dst, /* unroll_factor */ 1, offset); ++ __ bind(exit); ++ } ++ } ++ ++ void copy_loop8(Register src, Register dst, Register count_in_bytes, Register tmp, ++ int step, Label *Lcopy_small, Register loopsize = noreg) { ++ size_t granularity = uabs(step); ++ RegSet tmp_regs = RegSet::range(x13, x16); ++ assert_different_registers(src, dst, count_in_bytes, tmp); ++ ++ Label loop, copy2, copy1, finish; ++ if (loopsize == noreg) { ++ loopsize = t1; ++ __ mv(loopsize, 8 * granularity); ++ } ++ ++ // Cyclic copy with 8*step. ++ __ bind(loop); ++ { ++ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 8, step); ++ __ sub(count_in_bytes, count_in_bytes, 8 * granularity); ++ __ bge(count_in_bytes, loopsize, loop); ++ } ++ ++ if (Lcopy_small != NULL) { ++ __ bind(*Lcopy_small); ++ } ++ ++ // copy memory smaller than step * 8 bytes ++ __ andi(tmp, count_in_bytes, granularity << 2); ++ __ beqz(tmp, copy2); ++ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 4, step); ++ ++ __ bind(copy2); ++ __ andi(tmp, count_in_bytes, granularity << 1); ++ __ beqz(tmp, copy1); ++ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 2, step); ++ ++ __ bind(copy1); ++ __ andi(tmp, count_in_bytes, granularity); ++ __ beqz(tmp, finish); ++ copy_by_step(tmp_regs, src, dst, /* unroll_factor */ 1, step); ++ ++ __ bind(finish); ++ } ++ ++ // Cyclic copy with one step. ++ void copy_loop1(Register src, Register dst, Register count_in_bytes, int step, Register loopsize = noreg) { ++ size_t granularity = uabs(step); ++ Label loop1; ++ if (loopsize == noreg) { ++ loopsize = t0; ++ __ mv(loopsize, granularity); ++ } ++ ++ __ bind(loop1); ++ { ++ copy_by_step(RegSet::of(x13), src, dst, /* unroll_factor */ 1, step); ++ __ sub(count_in_bytes, count_in_bytes, granularity); ++ __ bge(count_in_bytes, loopsize, loop1); ++ } ++ } ++ ++ void align_unit(Register src, Register dst, Register count_in_bytes, ++ unsigned unit, bool is_backwards) { ++ Label skip; ++ __ andi(t0, dst, unit); ++ __ beqz(t0, skip); ++ copy_by_step(RegSet::of(t0), src, dst, 1, is_backwards ? -unit : unit); ++ __ sub(count_in_bytes, count_in_bytes, unit); ++ __ bind(skip); ++ } ++ ++ void copy_memory(bool is_align, Register s, Register d, Register count_in_elements, ++ Register tmp, int ele_step) { ++ ++ bool is_backwards = ele_step < 0; ++ unsigned int granularity = uabs(ele_step); ++ Label Lcopy_small, Ldone, Lcopy_ele, Laligned; ++ const Register count_in_bytes = x31, src = x28, dst = x29; ++ assert_different_registers(src, dst, count_in_elements, count_in_bytes, tmp, t1); ++ __ slli(count_in_bytes, count_in_elements, exact_log2(granularity)); ++ __ add(src, s, is_backwards ? count_in_bytes : zr); ++ __ add(dst, d, is_backwards ? count_in_bytes : zr); ++ ++ // if count_in_elements < 8, copy_small ++ __ mv(t0, 8); ++ if (is_align && granularity < BytesPerLong) { ++ __ blt(count_in_bytes, t0, Lcopy_small); ++ } else { ++ __ blt(count_in_elements, t0, Lcopy_small); ++ } ++ ++ if (granularity < BytesPerLong) { ++ Label Lcopy_aligned[3]; ++ Label Lalign8; ++ if (!is_align) { ++ Label Lalign_and_copy; ++ __ mv(t0, EagerArrayCopyThreshold); ++ __ blt(count_in_bytes, t0, Lalign_and_copy); ++ // Align dst to 8. ++ for (unsigned unit = granularity; unit <= 4; unit <<= 1) { ++ align_unit(src, dst, count_in_bytes, unit, is_backwards); ++ } ++ ++ Register shr = x30, shl = x7, tmp1 = x13; ++ ++ __ andi(shr, src, 0x7); ++ __ beqz(shr, Lalign8); ++ { ++ // calculaute the shift for store doubleword ++ __ slli(shr, shr, 3); ++ __ sub(shl, shr, 64); ++ __ sub(shl, zr, shl); ++ ++ // alsrc: previous position of src octal alignment ++ Register alsrc = t1; ++ __ andi(alsrc, src, -8); ++ ++ // move src to tail ++ __ andi(t0, count_in_bytes, -8); ++ if (is_backwards) { ++ __ sub(src, src, t0); ++ } else { ++ __ add(src, src, t0); ++ } ++ ++ // prepare for copy_dstaligned_loop ++ __ ld(tmp1, alsrc, 0); ++ dst_aligned_copy_32bytes_loop(alsrc, dst, shr, shl, count_in_bytes, is_backwards); ++ __ mv(x17, 8); ++ __ blt(count_in_bytes, x17, Lcopy_small); ++ dst_aligned_copy_8bytes_loop(alsrc, dst, shr, shl, count_in_bytes, x17, is_backwards); ++ __ j(Lcopy_small); ++ } ++ __ j(Ldone); ++ __ bind(Lalign_and_copy); ++ ++ // Check src and dst could be 8/4/2 algined at the same time. If could, align the ++ // memory and copy by 8/4/2. ++ __ xorr(t1, src, dst); ++ ++ for (unsigned alignment = granularity << 1; alignment <= 8; alignment <<= 1) { ++ Label skip; ++ unsigned int unit = alignment >> 1; ++ // Check src and dst could be aligned to checkbyte at the same time ++ // if copy from src to dst. If couldn't, jump to label not_aligned. ++ __ andi(t0, t1, alignment - 1); ++ __ bnez(t0, Lcopy_aligned[exact_log2(unit)]); ++ // Align src and dst to unit. ++ align_unit(src, dst, count_in_bytes, unit, is_backwards); ++ } ++ } ++ __ bind(Lalign8); ++ for (unsigned step_size = 8; step_size > granularity; step_size >>= 1) { ++ // Copy memory by steps, which has been aligned to step_size. ++ Label loop8, Ltail; ++ int step = is_backwards ? (-step_size) : step_size; ++ if (!(step_size == 8 && is_align)) { // which has load 8 to t0 before ++ // Check whether the memory size is smaller than step_size. ++ __ mv(t0, step_size); ++ __ blt(count_in_bytes, t0, Ltail); ++ } ++ const Register eight_step = t1; ++ __ mv(eight_step, step_size * 8); ++ __ bge(count_in_bytes, eight_step, loop8); ++ // If memory is less than 8*step_size bytes, loop by step. ++ copy_loop1(src, dst, count_in_bytes, step, t0); ++ copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size); ++ __ j(Ldone); ++ ++ __ bind(loop8); ++ // If memory is greater than or equal to 8*step_size bytes, loop by step*8. ++ copy_loop8(src, dst, count_in_bytes, tmp, step, NULL, eight_step); ++ __ bind(Ltail); ++ copy_tail(src, dst, count_in_bytes, tmp, ele_step, step_size); ++ __ j(Ldone); ++ ++ __ bind(Lcopy_aligned[exact_log2(step_size >> 1)]); ++ } ++ } ++ // If the ele_step is greater than 8, or the memory src and dst cannot ++ // be aligned with a number greater than the value of step. ++ // Cyclic copy with 8*ele_step. ++ copy_loop8(src, dst, count_in_bytes, tmp, ele_step, &Lcopy_small, noreg); ++ ++ __ bind(Ldone); ++ } ++ ++ void dst_aligned_copy_32bytes_loop(Register alsrc, Register dst, ++ Register shr, Register shl, ++ Register count_in_bytes, bool is_backwards) { ++ const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16, thirty_two = x17; ++ const Register sll_reg1 = is_backwards ? tmp1 : tmp2, ++ srl_reg1 = is_backwards ? tmp2 : tmp1, ++ sll_reg2 = is_backwards ? tmp2 : tmp3, ++ srl_reg2 = is_backwards ? tmp3 : tmp2, ++ sll_reg3 = is_backwards ? tmp3 : tmp4, ++ srl_reg3 = is_backwards ? tmp4 : tmp3, ++ sll_reg4 = is_backwards ? tmp4 : tmp1, ++ srl_reg4 = is_backwards ? tmp1 : tmp4; ++ assert_different_registers(t0, thirty_two, alsrc, shr, shl); ++ int unit = is_backwards ? -wordSize : wordSize; ++ int offset = is_backwards ? -wordSize : 0; ++ Label loop, done; ++ ++ __ mv(thirty_two, 32); ++ __ blt(count_in_bytes, thirty_two, done); ++ ++ __ bind(loop); ++ __ ld(tmp2, alsrc, unit); ++ __ sll(t0, sll_reg1, shl); ++ __ srl(tmp1, srl_reg1, shr); ++ __ orr(tmp1, tmp1, t0); ++ __ sd(tmp1, dst, offset); ++ ++ __ ld(tmp3, alsrc, unit * 2); ++ __ sll(t0, sll_reg2, shl); ++ __ srl(tmp2, srl_reg2, shr); ++ __ orr(tmp2, tmp2, t0); ++ __ sd(tmp2, dst, unit + offset); ++ ++ __ ld(tmp4, alsrc, unit * 3); ++ __ sll(t0, sll_reg3, shl); ++ __ srl(tmp3, srl_reg3, shr); ++ __ orr(tmp3, tmp3, t0); ++ __ sd(tmp3, dst, unit * 2 + offset); ++ ++ __ ld(tmp1, alsrc, unit * 4); ++ __ sll(t0, sll_reg4, shl); ++ __ srl(tmp4, srl_reg4, shr); ++ __ orr(tmp4, tmp4, t0); ++ __ sd(tmp4, dst, unit * 3 + offset); ++ ++ __ add(alsrc, alsrc, unit * 4); ++ __ add(dst, dst, unit * 4); ++ __ sub(count_in_bytes, count_in_bytes, 32); ++ __ bge(count_in_bytes, thirty_two, loop); ++ ++ __ bind(done); ++ } ++ ++ void dst_aligned_copy_8bytes_loop(Register alsrc, Register dst, ++ Register shr, Register shl, ++ Register count_in_bytes, Register eight, ++ bool is_backwards) { ++ const Register tmp1 = x13, tmp2 = x14, tmp3 = x15, tmp4 = x16; ++ const Register sll_reg = is_backwards ? tmp1 : tmp2, ++ srl_reg = is_backwards ? tmp2 : tmp1; ++ assert_different_registers(t0, eight, alsrc, shr, shl); ++ Label loop; ++ int unit = is_backwards ? -wordSize : wordSize; ++ ++ __ bind(loop); ++ __ ld(tmp2, alsrc, unit); ++ __ sll(t0, sll_reg, shl); ++ __ srl(tmp1, srl_reg, shr); ++ __ orr(t0, tmp1, t0); ++ __ sd(t0, dst, is_backwards ? unit : 0); ++ __ mv(tmp1, tmp2); ++ __ add(alsrc, alsrc, unit); ++ __ add(dst, dst, unit); ++ __ sub(count_in_bytes, count_in_bytes, 8); ++ __ bge(count_in_bytes, eight, loop); ++ } ++ ++ // Scan over array at a for count oops, verifying each one. ++ // Preserves a and count, clobbers t0 and t1. ++ void verify_oop_array(int size, Register a, Register count, Register temp) { ++ Label loop, end; ++ __ mv(t1, zr); ++ __ slli(t0, count, exact_log2(size)); ++ __ bind(loop); ++ __ bgeu(t1, t0, end); ++ ++ __ add(temp, a, t1); ++ if (size == wordSize) { ++ __ ld(temp, Address(temp, 0)); ++ __ verify_oop(temp); ++ } else { ++ __ lwu(temp, Address(temp, 0)); ++ __ decode_heap_oop(temp); // calls verify_oop ++ } ++ __ add(t1, t1, size); ++ __ j(loop); ++ __ bind(end); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address* entry, ++ const char* name, bool dest_uninitialized = false) { ++ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; ++ RegSet saved_reg = RegSet::of(s, d, count); ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ __ enter(); ++ ++ if (entry != NULL) { ++ *entry = __ pc(); ++ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) ++ BLOCK_COMMENT("Entry:"); ++ } ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_reg); ++ ++ if (is_oop) { ++ // save regs before copy_memory ++ __ push_reg(RegSet::of(d, count), sp); ++ } ++ copy_memory(aligned, s, d, count, t0, checked_cast(size)); ++ ++ if (is_oop) { ++ __ pop_reg(RegSet::of(d, count), sp); ++ if (VerifyOops) { ++ verify_oop_array(checked_cast(size), d, count, t2); ++ } ++ } ++ ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_reg); ++ ++ __ leave(); ++ __ mv(x10, zr); // return 0 ++ __ ret(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomically. ++ // ++ address generate_conjoint_copy(int size, bool aligned, bool is_oop, address nooverlap_target, ++ address* entry, const char* name, ++ bool dest_uninitialized = false) { ++ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; ++ RegSet saved_regs = RegSet::of(s, d, count); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ __ enter(); ++ ++ if (entry != NULL) { ++ *entry = __ pc(); ++ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) ++ BLOCK_COMMENT("Entry:"); ++ } ++ ++ // use fwd copy when (d-s) above_equal (count*size) ++ __ sub(t0, d, s); ++ __ slli(t1, count, exact_log2(size)); ++ __ bgeu(t0, t1, nooverlap_target); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, s, d, count, saved_regs); ++ ++ if (is_oop) { ++ // save regs before copy_memory ++ __ push_reg(RegSet::of(d, count), sp); ++ } ++ ++ copy_memory(aligned, s, d, count, t0, -size); ++ if (is_oop) { ++ __ pop_reg(RegSet::of(d, count), sp); ++ if (VerifyOops) { ++ verify_oop_array(size, d, count, t2); ++ } ++ } ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, t0, saved_regs); ++ __ leave(); ++ __ mv(x10, zr); // return 0 ++ __ ret(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_byte_copy(). ++ // ++ address generate_disjoint_byte_copy(bool aligned, address* entry, const char* name) { ++ const bool not_oop = false; ++ return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_byte_copy(bool aligned, address nooverlap_target, ++ address* entry, const char* name) { ++ const bool not_oop = false; ++ return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_short_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_short_copy(). ++ // ++ address generate_disjoint_short_copy(bool aligned, ++ address* entry, const char* name) { ++ const bool not_oop = false; ++ return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_short_copy(bool aligned, address nooverlap_target, ++ address* entry, const char* name) { ++ const bool not_oop = false; ++ return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_int_copy(bool aligned, address* entry, ++ const char* name, bool dest_uninitialized = false) { ++ const bool not_oop = false; ++ return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomically. ++ // ++ address generate_conjoint_int_copy(bool aligned, address nooverlap_target, ++ address* entry, const char* name, ++ bool dest_uninitialized = false) { ++ const bool not_oop = false; ++ return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name); ++ } ++ ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as size_t, can be zero ++ // ++ // Side Effects: ++ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the ++ // no-overlap entry point used by generate_conjoint_long_oop_copy(). ++ // ++ address generate_disjoint_long_copy(bool aligned, address* entry, ++ const char* name, bool dest_uninitialized = false) { ++ const bool not_oop = false; ++ return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as size_t, can be zero ++ // ++ address generate_conjoint_long_copy(bool aligned, ++ address nooverlap_target, address* entry, ++ const char* name, bool dest_uninitialized = false) { ++ const bool not_oop = false; ++ return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as size_t, can be zero ++ // ++ // Side Effects: ++ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the ++ // no-overlap entry point used by generate_conjoint_long_oop_copy(). ++ // ++ address generate_disjoint_oop_copy(bool aligned, address* entry, ++ const char* name, bool dest_uninitialized) { ++ const bool is_oop = true; ++ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); ++ return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized); ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as size_t, can be zero ++ // ++ address generate_conjoint_oop_copy(bool aligned, ++ address nooverlap_target, address* entry, ++ const char* name, bool dest_uninitialized) { ++ const bool is_oop = true; ++ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); ++ return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, ++ name, dest_uninitialized); ++ } ++ ++ // Helper for generating a dynamic type check. ++ // Smashes t0, t1. ++ void generate_type_check(Register sub_klass, ++ Register super_check_offset, ++ Register super_klass, ++ Label& L_success) { ++ assert_different_registers(sub_klass, super_check_offset, super_klass); ++ ++ BLOCK_COMMENT("type_check:"); ++ ++ Label L_miss; ++ ++ __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, super_check_offset); ++ __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL); ++ ++ // Fall through on failure! ++ __ BIND(L_miss); ++ } ++ ++ // ++ // Generate checkcasting array copy stub ++ // ++ // Input: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // c_rarg3 - size_t ckoff (super_check_offset) ++ // c_rarg4 - oop ckval (super_klass) ++ // ++ // Output: ++ // x10 == 0 - success ++ // x10 == -1^K - failure, where K is partial transfer count ++ // ++ address generate_checkcast_copy(const char* name, address* entry, ++ bool dest_uninitialized = false) { ++ Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; ++ ++ // Input registers (after setup_arg_regs) ++ const Register from = c_rarg0; // source array address ++ const Register to = c_rarg1; // destination array address ++ const Register count = c_rarg2; // elementscount ++ const Register ckoff = c_rarg3; // super_check_offset ++ const Register ckval = c_rarg4; // super_klass ++ ++ RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4); ++ RegSet wb_post_saved_regs = RegSet::of(count); ++ ++ // Registers used as temps (x7, x9, x18 are save-on-entry) ++ const Register count_save = x19; // orig elementscount ++ const Register start_to = x18; // destination array start address ++ const Register copied_oop = x7; // actual oop copied ++ const Register r9_klass = x9; // oop._klass ++ ++ //--------------------------------------------------------------- ++ // Assembler stub will be used for this call to arraycopy ++ // if the two arrays are subtypes of Object[] but the ++ // destination array type is not equal to or a supertype ++ // of the source type. Each element must be separately ++ // checked. ++ ++ assert_different_registers(from, to, count, ckoff, ckval, start_to, ++ copied_oop, r9_klass, count_save); ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ // Caller of this entry point must set up the argument registers ++ if (entry != NULL) { ++ *entry = __ pc(); ++ BLOCK_COMMENT("Entry:"); ++ } ++ ++ // Empty array: Nothing to do ++ __ beqz(count, L_done); ++ ++ __ push_reg(RegSet::of(x7, x9, x18, x19), sp); ++ ++#ifdef ASSERT ++ BLOCK_COMMENT("assert consistent ckoff/ckval"); ++ // The ckoff and ckval must be mutually consistent, ++ // even though caller generates both. ++ { Label L; ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ __ lwu(start_to, Address(ckval, sco_offset)); ++ __ beq(ckoff, start_to, L); ++ __ stop("super_check_offset inconsistent"); ++ __ bind(L); ++ } ++#endif //ASSERT ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT; ++ bool is_oop = true; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs); ++ ++ // save the original count ++ __ mv(count_save, count); ++ ++ // Copy from low to high addresses ++ __ mv(start_to, to); // Save destination array start address ++ __ j(L_load_element); ++ ++ // ======== begin loop ======== ++ // (Loop is rotated; its entry is L_load_element.) ++ // Loop control: ++ // for count to 0 do ++ // copied_oop = load_heap_oop(from++) ++ // ... generate_type_check ... ++ // store_heap_oop(to++, copied_oop) ++ // end ++ ++ __ align(OptoLoopAlignment); ++ ++ __ BIND(L_store_element); ++ __ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop ++ __ add(to, to, UseCompressedOops ? 4 : 8); ++ __ sub(count, count, 1); ++ __ beqz(count, L_do_card_marks); ++ ++ // ======== loop entry is here ======== ++ __ BIND(L_load_element); ++ __ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop ++ __ add(from, from, UseCompressedOops ? 4 : 8); ++ __ beqz(copied_oop, L_store_element); ++ ++ __ load_klass(r9_klass, copied_oop);// query the object klass ++ generate_type_check(r9_klass, ckoff, ckval, L_store_element); ++ // ======== end loop ======== ++ ++ // It was a real error; we must depend on the caller to finish the job. ++ // Register count = remaining oops, count_orig = total oops. ++ // Emit GC store barriers for the oops we have copied and report ++ // their number to the caller. ++ ++ __ sub(count, count_save, count); // K = partially copied oop count ++ __ xori(count, count, -1); // report (-1^K) to caller ++ __ beqz(count, L_done_pop); ++ ++ __ BIND(L_do_card_marks); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, count_save, t0, wb_post_saved_regs); ++ ++ __ bind(L_done_pop); ++ __ pop_reg(RegSet::of(x7, x9, x18, x19), sp); ++ inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); ++ ++ __ bind(L_done); ++ __ mv(x10, count); ++ __ leave(); ++ __ ret(); ++ ++ return start; ++ } ++ ++ // Perform range checks on the proposed arraycopy. ++ // Kills temp, but nothing else. ++ // Also, clean the sign bits of src_pos and dst_pos. ++ void arraycopy_range_checks(Register src, // source array oop (c_rarg0) ++ Register src_pos, // source position (c_rarg1) ++ Register dst, // destination array oo (c_rarg2) ++ Register dst_pos, // destination position (c_rarg3) ++ Register length, ++ Register temp, ++ Label& L_failed) { ++ BLOCK_COMMENT("arraycopy_range_checks:"); ++ ++ assert_different_registers(t0, temp); ++ ++ // if [src_pos + length > arrayOop(src)->length()] then FAIL ++ __ lwu(t0, Address(src, arrayOopDesc::length_offset_in_bytes())); ++ __ addw(temp, length, src_pos); ++ __ bgtu(temp, t0, L_failed); ++ ++ // if [dst_pos + length > arrayOop(dst)->length()] then FAIL ++ __ lwu(t0, Address(dst, arrayOopDesc::length_offset_in_bytes())); ++ __ addw(temp, length, dst_pos); ++ __ bgtu(temp, t0, L_failed); ++ ++ // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'. ++ __ zero_extend(src_pos, src_pos, 32); ++ __ zero_extend(dst_pos, dst_pos, 32); ++ ++ BLOCK_COMMENT("arraycopy_range_checks done"); ++ } ++ ++ // ++ // Generate 'unsafe' array copy stub ++ // Though just as safe as the other stubs, it takes an unscaled ++ // size_t argument instead of an element count. ++ // ++ // Input: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - byte count, treated as ssize_t, can be zero ++ // ++ // Examines the alignment of the operands and dispatches ++ // to a long, int, short, or byte copy loop. ++ // ++ address generate_unsafe_copy(const char* name, ++ address byte_copy_entry, ++ address short_copy_entry, ++ address int_copy_entry, ++ address long_copy_entry) { ++ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL && ++ int_copy_entry != NULL && long_copy_entry != NULL); ++ Label L_long_aligned, L_int_aligned, L_short_aligned; ++ const Register s = c_rarg0, d = c_rarg1, count = c_rarg2; ++ ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ // bump this on entry, not on exit: ++ inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); ++ ++ __ orr(t0, s, d); ++ __ orr(t0, t0, count); ++ ++ __ andi(t0, t0, BytesPerLong - 1); ++ __ beqz(t0, L_long_aligned); ++ __ andi(t0, t0, BytesPerInt - 1); ++ __ beqz(t0, L_int_aligned); ++ __ andi(t0, t0, 1); ++ __ beqz(t0, L_short_aligned); ++ __ j(RuntimeAddress(byte_copy_entry)); ++ ++ __ BIND(L_short_aligned); ++ __ srli(count, count, LogBytesPerShort); // size => short_count ++ __ j(RuntimeAddress(short_copy_entry)); ++ __ BIND(L_int_aligned); ++ __ srli(count, count, LogBytesPerInt); // size => int_count ++ __ j(RuntimeAddress(int_copy_entry)); ++ __ BIND(L_long_aligned); ++ __ srli(count, count, LogBytesPerLong); // size => long_count ++ __ j(RuntimeAddress(long_copy_entry)); ++ ++ return start; ++ } ++ ++ // ++ // Generate generic array copy stubs ++ // ++ // Input: ++ // c_rarg0 - src oop ++ // c_rarg1 - src_pos (32-bits) ++ // c_rarg2 - dst oop ++ // c_rarg3 - dst_pos (32-bits) ++ // c_rarg4 - element count (32-bits) ++ // ++ // Output: ++ // x10 == 0 - success ++ // x10 == -1^K - failure, where K is partial transfer count ++ // ++ address generate_generic_copy(const char* name, ++ address byte_copy_entry, address short_copy_entry, ++ address int_copy_entry, address oop_copy_entry, ++ address long_copy_entry, address checkcast_copy_entry) { ++ assert_cond(byte_copy_entry != NULL && short_copy_entry != NULL && ++ int_copy_entry != NULL && oop_copy_entry != NULL && ++ long_copy_entry != NULL && checkcast_copy_entry != NULL); ++ Label L_failed, L_failed_0, L_objArray; ++ Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs; ++ ++ // Input registers ++ const Register src = c_rarg0; // source array oop ++ const Register src_pos = c_rarg1; // source position ++ const Register dst = c_rarg2; // destination array oop ++ const Register dst_pos = c_rarg3; // destination position ++ const Register length = c_rarg4; ++ ++ __ align(CodeEntryAlignment); ++ ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ // Registers used as temps ++ const Register dst_klass = c_rarg5; ++ ++ address start = __ pc(); ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ // bump this on entry, not on exit: ++ inc_counter_np(SharedRuntime::_generic_array_copy_ctr); ++ ++ //----------------------------------------------------------------------- ++ // Assembler stub will be used for this call to arraycopy ++ // if the following conditions are met: ++ // ++ // (1) src and dst must not be null. ++ // (2) src_pos must not be negative. ++ // (3) dst_pos must not be negative. ++ // (4) length must not be negative. ++ // (5) src klass and dst klass should be the same and not NULL. ++ // (6) src and dst should be arrays. ++ // (7) src_pos + length must not exceed length of src. ++ // (8) dst_pos + length must not exceed length of dst. ++ // ++ ++ // if [src == NULL] then return -1 ++ __ beqz(src, L_failed); ++ ++ // if [src_pos < 0] then return -1 ++ // i.e. sign bit set ++ __ andi(t0, src_pos, 1UL << 31); ++ __ bnez(t0, L_failed); ++ ++ // if [dst == NULL] then return -1 ++ __ beqz(dst, L_failed); ++ ++ // if [dst_pos < 0] then return -1 ++ // i.e. sign bit set ++ __ andi(t0, dst_pos, 1UL << 31); ++ __ bnez(t0, L_failed); ++ ++ // registers used as temp ++ const Register scratch_length = x28; // elements count to copy ++ const Register scratch_src_klass = x29; // array klass ++ const Register lh = x30; // layout helper ++ ++ // if [length < 0] then return -1 ++ __ addw(scratch_length, length, zr); // length (elements count, 32-bits value) ++ // i.e. sign bit set ++ __ andi(t0, scratch_length, 1UL << 31); ++ __ bnez(t0, L_failed); ++ ++ __ load_klass(scratch_src_klass, src); ++#ifdef ASSERT ++ { ++ BLOCK_COMMENT("assert klasses not null {"); ++ Label L1, L2; ++ __ bnez(scratch_src_klass, L2); // it is broken if klass is NULL ++ __ bind(L1); ++ __ stop("broken null klass"); ++ __ bind(L2); ++ __ load_klass(t0, dst); ++ __ beqz(t0, L1); // this would be broken also ++ BLOCK_COMMENT("} assert klasses not null done"); ++ } ++#endif ++ ++ // Load layout helper (32-bits) ++ // ++ // |array_tag| | header_size | element_type | |log2_element_size| ++ // 32 30 24 16 8 2 0 ++ // ++ // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 ++ // ++ ++ const int lh_offset = in_bytes(Klass::layout_helper_offset()); ++ ++ // Handle objArrays completely differently... ++ const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); ++ __ lw(lh, Address(scratch_src_klass, lh_offset)); ++ __ mvw(t0, objArray_lh); ++ __ beq(lh, t0, L_objArray); ++ ++ // if [src->klass() != dst->klass()] then return -1 ++ __ load_klass(t1, dst); ++ __ bne(t1, scratch_src_klass, L_failed); ++ ++ // if [src->is_Array() != NULL] then return -1 ++ // i.e. (lh >= 0) ++ __ andi(t0, lh, 1UL << 31); ++ __ beqz(t0, L_failed); ++ ++ // At this point, it is known to be a typeArray (array_tag 0x3). ++#ifdef ASSERT ++ { ++ BLOCK_COMMENT("assert primitive array {"); ++ Label L; ++ __ mvw(t1, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift); ++ __ bge(lh, t1, L); ++ __ stop("must be a primitive array"); ++ __ bind(L); ++ BLOCK_COMMENT("} assert primitive array done"); ++ } ++#endif ++ ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, ++ t1, L_failed); ++ ++ // TypeArrayKlass ++ // ++ // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize) ++ // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize) ++ // ++ ++ const Register t0_offset = t0; // array offset ++ const Register x22_elsize = lh; // element size ++ ++ // Get array_header_in_bytes() ++ int lh_header_size_width = exact_log2(Klass::_lh_header_size_mask + 1); ++ int lh_header_size_msb = Klass::_lh_header_size_shift + lh_header_size_width; ++ __ slli(t0_offset, lh, XLEN - lh_header_size_msb); // left shift to remove 24 ~ 32; ++ __ srli(t0_offset, t0_offset, XLEN - lh_header_size_width); // array_offset ++ ++ __ add(src, src, t0_offset); // src array offset ++ __ add(dst, dst, t0_offset); // dst array offset ++ BLOCK_COMMENT("choose copy loop based on element size"); ++ ++ // next registers should be set before the jump to corresponding stub ++ const Register from = c_rarg0; // source array address ++ const Register to = c_rarg1; // destination array address ++ const Register count = c_rarg2; // elements count ++ ++ // 'from', 'to', 'count' registers should be set in such order ++ // since they are the same as 'src', 'src_pos', 'dst'. ++ ++ assert(Klass::_lh_log2_element_size_shift == 0, "fix this code"); ++ ++ // The possible values of elsize are 0-3, i.e. exact_log2(element ++ // size in bytes). We do a simple bitwise binary search. ++ __ BIND(L_copy_bytes); ++ __ andi(t0, x22_elsize, 2); ++ __ bnez(t0, L_copy_ints); ++ __ andi(t0, x22_elsize, 1); ++ __ bnez(t0, L_copy_shorts); ++ __ add(from, src, src_pos); // src_addr ++ __ add(to, dst, dst_pos); // dst_addr ++ __ addw(count, scratch_length, zr); // length ++ __ j(RuntimeAddress(byte_copy_entry)); ++ ++ __ BIND(L_copy_shorts); ++ __ shadd(from, src_pos, src, t0, 1); // src_addr ++ __ shadd(to, dst_pos, dst, t0, 1); // dst_addr ++ __ addw(count, scratch_length, zr); // length ++ __ j(RuntimeAddress(short_copy_entry)); ++ ++ __ BIND(L_copy_ints); ++ __ andi(t0, x22_elsize, 1); ++ __ bnez(t0, L_copy_longs); ++ __ shadd(from, src_pos, src, t0, 2); // src_addr ++ __ shadd(to, dst_pos, dst, t0, 2); // dst_addr ++ __ addw(count, scratch_length, zr); // length ++ __ j(RuntimeAddress(int_copy_entry)); ++ ++ __ BIND(L_copy_longs); ++#ifdef ASSERT ++ { ++ BLOCK_COMMENT("assert long copy {"); ++ Label L; ++ __ andi(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> x22_elsize ++ __ addw(lh, lh, zr); ++ __ mvw(t0, LogBytesPerLong); ++ __ beq(x22_elsize, t0, L); ++ __ stop("must be long copy, but elsize is wrong"); ++ __ bind(L); ++ BLOCK_COMMENT("} assert long copy done"); ++ } ++#endif ++ __ shadd(from, src_pos, src, t0, 3); // src_addr ++ __ shadd(to, dst_pos, dst, t0, 3); // dst_addr ++ __ addw(count, scratch_length, zr); // length ++ __ j(RuntimeAddress(long_copy_entry)); ++ ++ // ObjArrayKlass ++ __ BIND(L_objArray); ++ // live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos] ++ ++ Label L_plain_copy, L_checkcast_copy; ++ // test array classes for subtyping ++ __ load_klass(t2, dst); ++ __ bne(scratch_src_klass, t2, L_checkcast_copy); // usual case is exact equality ++ ++ // Identically typed arrays can be copied without element-wise checks. ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, ++ t1, L_failed); ++ ++ __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); ++ __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); ++ __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ addw(count, scratch_length, zr); // length ++ __ BIND(L_plain_copy); ++ __ j(RuntimeAddress(oop_copy_entry)); ++ ++ __ BIND(L_checkcast_copy); ++ // live at this point: scratch_src_klass, scratch_length, t2 (dst_klass) ++ { ++ // Before looking at dst.length, make sure dst is also an objArray. ++ __ lwu(t0, Address(t2, lh_offset)); ++ __ mvw(t1, objArray_lh); ++ __ bne(t0, t1, L_failed); ++ ++ // It is safe to examine both src.length and dst.length. ++ arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length, ++ t2, L_failed); ++ ++ __ load_klass(dst_klass, dst); // reload ++ ++ // Marshal the base address arguments now, freeing registers. ++ __ shadd(from, src_pos, src, t0, LogBytesPerHeapOop); ++ __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ shadd(to, dst_pos, dst, t0, LogBytesPerHeapOop); ++ __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ __ addw(count, length, zr); // length (reloaded) ++ const Register sco_temp = c_rarg3; // this register is free now ++ assert_different_registers(from, to, count, sco_temp, ++ dst_klass, scratch_src_klass); ++ ++ // Generate the type check. ++ const int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ __ lwu(sco_temp, Address(dst_klass, sco_offset)); ++ ++ // Smashes t0, t1 ++ generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy); ++ ++ // Fetch destination element klass from the ObjArrayKlass header. ++ int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); ++ __ ld(dst_klass, Address(dst_klass, ek_offset)); ++ __ lwu(sco_temp, Address(dst_klass, sco_offset)); ++ ++ // the checkcast_copy loop needs two extra arguments: ++ assert(c_rarg3 == sco_temp, "#3 already in place"); ++ // Set up arguments for checkcast_copy_entry. ++ __ mv(c_rarg4, dst_klass); // dst.klass.element_klass ++ __ j(RuntimeAddress(checkcast_copy_entry)); ++ } ++ ++ __ BIND(L_failed); ++ __ mv(x10, -1); ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ ret(); ++ ++ return start; ++ } ++ ++ // ++ // Generate stub for array fill. If "aligned" is true, the ++ // "to" address is assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // to: c_rarg0 ++ // value: c_rarg1 ++ // count: c_rarg2 treated as signed ++ // ++ address generate_fill(BasicType t, bool aligned, const char* name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ BLOCK_COMMENT("Entry:"); ++ ++ const Register to = c_rarg0; // source array address ++ const Register value = c_rarg1; // value ++ const Register count = c_rarg2; // elements count ++ ++ const Register bz_base = x28; // base for block_zero routine ++ const Register cnt_words = x29; // temp register ++ const Register tmp_reg = t1; ++ ++ __ enter(); ++ ++ Label L_fill_elements, L_exit1; ++ ++ int shift = -1; ++ switch (t) { ++ case T_BYTE: ++ shift = 0; ++ ++ // Zero extend value ++ // 8 bit -> 16 bit ++ __ andi(value, value, 0xff); ++ __ mv(tmp_reg, value); ++ __ slli(tmp_reg, tmp_reg, 8); ++ __ orr(value, value, tmp_reg); ++ ++ // 16 bit -> 32 bit ++ __ mv(tmp_reg, value); ++ __ slli(tmp_reg, tmp_reg, 16); ++ __ orr(value, value, tmp_reg); ++ ++ __ mv(tmp_reg, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ bltu(count, tmp_reg, L_fill_elements); ++ break; ++ case T_SHORT: ++ shift = 1; ++ // Zero extend value ++ // 16 bit -> 32 bit ++ __ andi(value, value, 0xffff); ++ __ mv(tmp_reg, value); ++ __ slli(tmp_reg, tmp_reg, 16); ++ __ orr(value, value, tmp_reg); ++ ++ // Short arrays (< 8 bytes) fill by element ++ __ mv(tmp_reg, 8 >> shift); ++ __ bltu(count, tmp_reg, L_fill_elements); ++ break; ++ case T_INT: ++ shift = 2; ++ ++ // Short arrays (< 8 bytes) fill by element ++ __ mv(tmp_reg, 8 >> shift); ++ __ bltu(count, tmp_reg, L_fill_elements); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ // Align source address at 8 bytes address boundary. ++ Label L_skip_align1, L_skip_align2, L_skip_align4; ++ if (!aligned) { ++ switch (t) { ++ case T_BYTE: ++ // One byte misalignment happens only for byte arrays. ++ __ andi(t0, to, 1); ++ __ beqz(t0, L_skip_align1); ++ __ sb(value, Address(to, 0)); ++ __ addi(to, to, 1); ++ __ addiw(count, count, -1); ++ __ bind(L_skip_align1); ++ // Fallthrough ++ case T_SHORT: ++ // Two bytes misalignment happens only for byte and short (char) arrays. ++ __ andi(t0, to, 2); ++ __ beqz(t0, L_skip_align2); ++ __ sh(value, Address(to, 0)); ++ __ addi(to, to, 2); ++ __ addiw(count, count, -(2 >> shift)); ++ __ bind(L_skip_align2); ++ // Fallthrough ++ case T_INT: ++ // Align to 8 bytes, we know we are 4 byte aligned to start. ++ __ andi(t0, to, 4); ++ __ beqz(t0, L_skip_align4); ++ __ sw(value, Address(to, 0)); ++ __ addi(to, to, 4); ++ __ addiw(count, count, -(4 >> shift)); ++ __ bind(L_skip_align4); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ ++ // ++ // Fill large chunks ++ // ++ __ srliw(cnt_words, count, 3 - shift); // number of words ++ ++ // 32 bit -> 64 bit ++ __ andi(value, value, 0xffffffff); ++ __ mv(tmp_reg, value); ++ __ slli(tmp_reg, tmp_reg, 32); ++ __ orr(value, value, tmp_reg); ++ ++ __ slli(tmp_reg, cnt_words, 3 - shift); ++ __ subw(count, count, tmp_reg); ++ { ++ __ fill_words(to, cnt_words, value); ++ } ++ ++ // Remaining count is less than 8 bytes. Fill it by a single store. ++ // Note that the total length is no less than 8 bytes. ++ if (t == T_BYTE || t == T_SHORT) { ++ __ beqz(count, L_exit1); ++ __ shadd(to, count, to, tmp_reg, shift); // points to the end ++ __ sd(value, Address(to, -8)); // overwrite some elements ++ __ bind(L_exit1); ++ __ leave(); ++ __ ret(); ++ } ++ ++ // Handle copies less than 8 bytes. ++ Label L_fill_2, L_fill_4, L_exit2; ++ __ bind(L_fill_elements); ++ switch (t) { ++ case T_BYTE: ++ __ andi(t0, count, 1); ++ __ beqz(t0, L_fill_2); ++ __ sb(value, Address(to, 0)); ++ __ addi(to, to, 1); ++ __ bind(L_fill_2); ++ __ andi(t0, count, 2); ++ __ beqz(t0, L_fill_4); ++ __ sh(value, Address(to, 0)); ++ __ addi(to, to, 2); ++ __ bind(L_fill_4); ++ __ andi(t0, count, 4); ++ __ beqz(t0, L_exit2); ++ __ sw(value, Address(to, 0)); ++ break; ++ case T_SHORT: ++ __ andi(t0, count, 1); ++ __ beqz(t0, L_fill_4); ++ __ sh(value, Address(to, 0)); ++ __ addi(to, to, 2); ++ __ bind(L_fill_4); ++ __ andi(t0, count, 2); ++ __ beqz(t0, L_exit2); ++ __ sw(value, Address(to, 0)); ++ break; ++ case T_INT: ++ __ beqz(count, L_exit2); ++ __ sw(value, Address(to, 0)); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ __ bind(L_exit2); ++ __ leave(); ++ __ ret(); ++ return start; ++ } ++ ++ void generate_arraycopy_stubs() { ++ address entry = NULL; ++ address entry_jbyte_arraycopy = NULL; ++ address entry_jshort_arraycopy = NULL; ++ address entry_jint_arraycopy = NULL; ++ address entry_oop_arraycopy = NULL; ++ address entry_jlong_arraycopy = NULL; ++ address entry_checkcast_arraycopy = NULL; ++ ++ StubRoutines::riscv::_zero_blocks = generate_zero_blocks(); ++ ++ //*** jbyte ++ // Always need aligned and unaligned versions ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry, ++ "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry, ++ &entry_jbyte_arraycopy, ++ "jbyte_arraycopy"); ++ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry, ++ "arrayof_jbyte_disjoint_arraycopy"); ++ StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL, ++ "arrayof_jbyte_arraycopy"); ++ ++ //*** jshort ++ // Always need aligned and unaligned versions ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry, ++ "jshort_disjoint_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry, ++ &entry_jshort_arraycopy, ++ "jshort_arraycopy"); ++ StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry, ++ "arrayof_jshort_disjoint_arraycopy"); ++ StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL, ++ "arrayof_jshort_arraycopy"); ++ ++ //*** jint ++ // Aligned versions ++ StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry, ++ "arrayof_jint_disjoint_arraycopy"); ++ StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy, ++ "arrayof_jint_arraycopy"); ++ // In 64 bit we need both aligned and unaligned versions of jint arraycopy. ++ // entry_jint_arraycopy always points to the unaligned version ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry, ++ "jint_disjoint_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry, ++ &entry_jint_arraycopy, ++ "jint_arraycopy"); ++ ++ //*** jlong ++ // It is always aligned ++ StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry, ++ "arrayof_jlong_disjoint_arraycopy"); ++ StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy, ++ "arrayof_jlong_arraycopy"); ++ StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy; ++ StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy; ++ ++ //*** oops ++ { ++ // With compressed oops we need unaligned versions; notice that ++ // we overwrite entry_oop_arraycopy. ++ bool aligned = !UseCompressedOops; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy ++ = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy", ++ /*dest_uninitialized*/false); ++ StubRoutines::_arrayof_oop_arraycopy ++ = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy", ++ /*dest_uninitialized*/false); ++ // Aligned versions without pre-barriers ++ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit ++ = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit", ++ /*dest_uninitialized*/true); ++ StubRoutines::_arrayof_oop_arraycopy_uninit ++ = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit", ++ /*dest_uninitialized*/true); ++ } ++ ++ StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy; ++ StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy; ++ StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit; ++ StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit; ++ ++ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); ++ StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, ++ /*dest_uninitialized*/true); ++ ++ ++ StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy", ++ entry_jbyte_arraycopy, ++ entry_jshort_arraycopy, ++ entry_jint_arraycopy, ++ entry_jlong_arraycopy); ++ ++ StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy", ++ entry_jbyte_arraycopy, ++ entry_jshort_arraycopy, ++ entry_jint_arraycopy, ++ entry_oop_arraycopy, ++ entry_jlong_arraycopy, ++ entry_checkcast_arraycopy); ++ ++ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); ++ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); ++ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); ++ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); ++ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); ++ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); ++ } ++ ++ // Safefetch stubs. ++ void generate_safefetch(const char* name, int size, address* entry, ++ address* fault_pc, address* continuation_pc) { ++ // safefetch signatures: ++ // int SafeFetch32(int* adr, int errValue) ++ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue) ++ // ++ // arguments: ++ // c_rarg0 = adr ++ // c_rarg1 = errValue ++ // ++ // result: ++ // PPC_RET = *adr or errValue ++ assert_cond(entry != NULL && fault_pc != NULL && continuation_pc != NULL); ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ // Entry point, pc or function descriptor. ++ *entry = __ pc(); ++ ++ // Load *adr into c_rarg1, may fault. ++ *fault_pc = __ pc(); ++ switch (size) { ++ case 4: ++ // int32_t ++ __ lw(c_rarg1, Address(c_rarg0, 0)); ++ break; ++ case 8: ++ // int64_t ++ __ ld(c_rarg1, Address(c_rarg0, 0)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ // return errValue or *adr ++ *continuation_pc = __ pc(); ++ __ mv(x10, c_rarg1); ++ __ ret(); ++ } ++ ++#ifdef COMPILER2 ++ // code for comparing 16 bytes of strings with same encoding ++ void compare_string_16_bytes_same(Label& DIFF1, Label& DIFF2) { ++ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31; ++ __ ld(tmp5, Address(str1)); ++ __ addi(str1, str1, wordSize); ++ __ xorr(tmp4, tmp1, tmp2); ++ __ ld(cnt1, Address(str2)); ++ __ addi(str2, str2, wordSize); ++ __ bnez(tmp4, DIFF1); ++ __ ld(tmp1, Address(str1)); ++ __ addi(str1, str1, wordSize); ++ __ xorr(tmp4, tmp5, cnt1); ++ __ ld(tmp2, Address(str2)); ++ __ addi(str2, str2, wordSize); ++ __ bnez(tmp4, DIFF2); ++ } ++ ++ // code for comparing 8 characters of strings with Latin1 and Utf16 encoding ++ void compare_string_8_x_LU(Register tmpL, Register tmpU, Register strL, Register strU, Label& DIFF) { ++ const Register tmp = x30; ++ __ ld(tmpL, Address(strL)); ++ __ addi(strL, strL, wordSize); ++ __ ld(tmpU, Address(strU)); ++ __ addi(strU, strU, wordSize); ++ __ inflate_lo32(tmp, tmpL); ++ __ mv(t0, tmp); ++ __ xorr(tmp, tmpU, t0); ++ __ bnez(tmp, DIFF); ++ ++ __ ld(tmpU, Address(strU)); ++ __ addi(strU, strU, wordSize); ++ __ inflate_hi32(tmp, tmpL); ++ __ mv(t0, tmp); ++ __ xorr(tmp, tmpU, t0); ++ __ bnez(tmp, DIFF); ++ } ++ ++ // x10 = result ++ // x11 = str1 ++ // x12 = cnt1 ++ // x13 = str2 ++ // x14 = cnt2 ++ // x28 = tmp1 ++ // x29 = tmp2 ++ // x30 = tmp3 ++ address generate_compare_long_string_different_encoding(bool isLU) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", isLU ? "compare_long_string_different_encoding LU" : "compare_long_string_different_encoding UL"); ++ address entry = __ pc(); ++ Label SMALL_LOOP, TAIL, LOAD_LAST, DIFF, DONE, CALCULATE_DIFFERENCE; ++ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14, ++ tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31; ++ RegSet spilled_regs = RegSet::of(tmp4, tmp5); ++ ++ // cnt2 == amount of characters left to compare ++ // Check already loaded first 4 symbols ++ __ inflate_lo32(tmp3, isLU ? tmp1 : tmp2); ++ __ mv(isLU ? tmp1 : tmp2, tmp3); ++ __ addi(str1, str1, isLU ? wordSize / 2 : wordSize); ++ __ addi(str2, str2, isLU ? wordSize : wordSize / 2); ++ __ sub(cnt2, cnt2, wordSize / 2); // Already loaded 4 symbols. ++ __ push_reg(spilled_regs, sp); ++ ++ __ xorr(tmp3, tmp1, tmp2); ++ __ mv(tmp5, tmp2); ++ __ bnez(tmp3, CALCULATE_DIFFERENCE); ++ ++ Register strU = isLU ? str2 : str1, ++ strL = isLU ? str1 : str2, ++ tmpU = isLU ? tmp5 : tmp1, // where to keep U for comparison ++ tmpL = isLU ? tmp1 : tmp5; // where to keep L for comparison ++ ++ // make sure main loop is byte-aligned, we should load another 4 bytes from strL ++ __ beqz(cnt2, DONE); // no characters left ++ __ lwu(tmpL, Address(strL)); ++ __ addi(strL, strL, wordSize / 2); ++ __ ld(tmpU, Address(strU)); ++ __ addi(strU, strU, wordSize); ++ __ inflate_lo32(tmp3, tmpL); ++ __ mv(tmpL, tmp3); ++ __ xorr(tmp3, tmpU, tmpL); ++ __ bnez(tmp3, CALCULATE_DIFFERENCE); ++ __ addi(cnt2, cnt2, -wordSize / 2); ++ ++ __ beqz(cnt2, DONE); // no character left ++ __ sub(cnt2, cnt2, wordSize * 2); ++ __ bltz(cnt2, TAIL); ++ __ bind(SMALL_LOOP); // smaller loop ++ __ sub(cnt2, cnt2, wordSize * 2); ++ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); ++ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); ++ __ bgez(cnt2, SMALL_LOOP); ++ __ addi(t0, cnt2, wordSize * 2); ++ __ beqz(t0, DONE); ++ __ bind(TAIL); // 1..15 characters left ++ if (AvoidUnalignedAccesses) { ++ // Aligned access. Load bytes from byte-aligned address, ++ // which may contain invalid bytes in last load. ++ // Invalid bytes should be removed before comparison. ++ Label LOAD_LAST, WORD_CMP; ++ __ addi(t0, cnt2, wordSize); ++ __ bgtz(t0, LOAD_LAST); ++ // remaining characters is greater than or equals to 8, we can do one compare_string_8_x_LU ++ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); ++ __ addi(cnt2, cnt2, wordSize); ++ __ beqz(cnt2, DONE); // no character left ++ __ bind(LOAD_LAST); // 1..7 characters left ++ __ lwu(tmpL, Address(strL)); ++ __ addi(strL, strL, wordSize / 2); ++ __ ld(tmpU, Address(strU)); ++ __ addi(strU, strU, wordSize); ++ __ inflate_lo32(tmp3, tmpL); ++ __ mv(tmpL, tmp3); ++ __ addi(t0, cnt2, wordSize / 2); ++ __ blez(t0, WORD_CMP); ++ __ slli(t0, t0, 1); // now in bytes ++ __ slli(t0, t0, LogBitsPerByte); ++ __ sll(tmpL, tmpL, t0); ++ __ sll(tmpU, tmpU, t0); ++ // remaining characters is greater than or equals to 4, we can do one full 4-byte comparison ++ __ bind(WORD_CMP); ++ __ xorr(tmp3, tmpU, tmpL); ++ __ bnez(tmp3, CALCULATE_DIFFERENCE); ++ __ addi(cnt2, cnt2, wordSize / 2); ++ __ bltz(cnt2, LOAD_LAST); // 1..3 characters left ++ __ j(DONE); // no character left ++ } else { ++ // Unaligned accesses. Load from non-byte aligned address. ++ __ shadd(strU, cnt2, strU, t0, 1); // convert cnt2 into bytes and get Address of last 8 bytes in UTF-16 string ++ __ add(strL, strL, cnt2); // Address of last 16 bytes in Latin1 string ++ // last 16 characters ++ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); ++ compare_string_8_x_LU(tmpL, tmpU, strL, strU, DIFF); ++ __ j(DONE); ++ } ++ __ bind(DIFF); ++ __ mv(tmpL, t0); ++ // Find the first different characters in the longwords and ++ // compute their difference. ++ __ bind(CALCULATE_DIFFERENCE); ++ __ ctzc_bit(tmp4, tmp3); ++ __ srl(tmp1, tmp1, tmp4); ++ __ srl(tmp5, tmp5, tmp4); ++ __ andi(tmp1, tmp1, 0xFFFF); ++ __ andi(tmp5, tmp5, 0xFFFF); ++ __ sub(result, tmp1, tmp5); ++ __ bind(DONE); ++ __ pop_reg(spilled_regs, sp); ++ __ ret(); ++ return entry; ++ } ++ ++ // x10 = result ++ // x11 = str1 ++ // x12 = cnt1 ++ // x13 = str2 ++ // x14 = cnt2 ++ // x28 = tmp1 ++ // x29 = tmp2 ++ // x30 = tmp3 ++ // x31 = tmp4 ++ address generate_compare_long_string_same_encoding(bool isLL) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", isLL ? ++ "compare_long_string_same_encoding LL" : "compare_long_string_same_encoding UU"); ++ address entry = __ pc(); ++ Label SMALL_LOOP, CHECK_LAST, DIFF2, TAIL, ++ LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF; ++ const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, cnt2 = x14, ++ tmp1 = x28, tmp2 = x29, tmp3 = x30, tmp4 = x7, tmp5 = x31; ++ RegSet spilled_regs = RegSet::of(tmp4, tmp5); ++ ++ // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used ++ // update cnt2 counter with already loaded 8 bytes ++ __ sub(cnt2, cnt2, wordSize / (isLL ? 1 : 2)); ++ // update pointers, because of previous read ++ __ add(str1, str1, wordSize); ++ __ add(str2, str2, wordSize); ++ // less than 16 bytes left? ++ __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize); ++ __ push_reg(spilled_regs, sp); ++ __ bltz(cnt2, TAIL); ++ __ bind(SMALL_LOOP); ++ compare_string_16_bytes_same(DIFF, DIFF2); ++ __ sub(cnt2, cnt2, isLL ? 2 * wordSize : wordSize); ++ __ bgez(cnt2, SMALL_LOOP); ++ __ bind(TAIL); ++ __ addi(cnt2, cnt2, isLL ? 2 * wordSize : wordSize); ++ __ beqz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); ++ __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2); ++ __ blez(cnt2, CHECK_LAST); ++ __ xorr(tmp4, tmp1, tmp2); ++ __ bnez(tmp4, DIFF); ++ __ ld(tmp1, Address(str1)); ++ __ addi(str1, str1, wordSize); ++ __ ld(tmp2, Address(str2)); ++ __ addi(str2, str2, wordSize); ++ __ sub(cnt2, cnt2, isLL ? wordSize : wordSize / 2); ++ __ bind(CHECK_LAST); ++ if (!isLL) { ++ __ add(cnt2, cnt2, cnt2); // now in bytes ++ } ++ __ xorr(tmp4, tmp1, tmp2); ++ __ bnez(tmp4, DIFF); ++ if (AvoidUnalignedAccesses) { ++ // Aligned access. Load bytes from byte-aligned address, ++ // which may contain invalid bytes in last load. ++ // Invalid bytes should be removed before comparison. ++ __ ld(tmp5, Address(str1)); ++ __ ld(cnt1, Address(str2)); ++ __ neg(cnt2, cnt2); ++ __ slli(cnt2, cnt2, LogBitsPerByte); ++ __ sll(tmp5, tmp5, cnt2); ++ __ sll(cnt1, cnt1, cnt2); ++ } else { ++ // Unaligned access. Load from non-byte aligned address. ++ __ add(str1, str1, cnt2); ++ __ ld(tmp5, Address(str1)); ++ __ add(str2, str2, cnt2); ++ __ ld(cnt1, Address(str2)); ++ } ++ ++ __ xorr(tmp4, tmp5, cnt1); ++ __ beqz(tmp4, LENGTH_DIFF); ++ // Find the first different characters in the longwords and ++ // compute their difference. ++ __ bind(DIFF2); ++ __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb ++ __ srl(tmp5, tmp5, tmp3); ++ __ srl(cnt1, cnt1, tmp3); ++ if (isLL) { ++ __ andi(tmp5, tmp5, 0xFF); ++ __ andi(cnt1, cnt1, 0xFF); ++ } else { ++ __ andi(tmp5, tmp5, 0xFFFF); ++ __ andi(cnt1, cnt1, 0xFFFF); ++ } ++ __ sub(result, tmp5, cnt1); ++ __ j(LENGTH_DIFF); ++ __ bind(DIFF); ++ __ ctzc_bit(tmp3, tmp4, isLL); // count zero from lsb to msb ++ __ srl(tmp1, tmp1, tmp3); ++ __ srl(tmp2, tmp2, tmp3); ++ if (isLL) { ++ __ andi(tmp1, tmp1, 0xFF); ++ __ andi(tmp2, tmp2, 0xFF); ++ } else { ++ __ andi(tmp1, tmp1, 0xFFFF); ++ __ andi(tmp2, tmp2, 0xFFFF); ++ } ++ __ sub(result, tmp1, tmp2); ++ __ j(LENGTH_DIFF); ++ __ bind(LAST_CHECK_AND_LENGTH_DIFF); ++ __ xorr(tmp4, tmp1, tmp2); ++ __ bnez(tmp4, DIFF); ++ __ bind(LENGTH_DIFF); ++ __ pop_reg(spilled_regs, sp); ++ __ ret(); ++ return entry; ++ } ++ ++ void generate_compare_long_strings() { ++ StubRoutines::riscv::_compare_long_string_LL = generate_compare_long_string_same_encoding(true); ++ StubRoutines::riscv::_compare_long_string_UU = generate_compare_long_string_same_encoding(false); ++ StubRoutines::riscv::_compare_long_string_LU = generate_compare_long_string_different_encoding(true); ++ StubRoutines::riscv::_compare_long_string_UL = generate_compare_long_string_different_encoding(false); ++ } ++ ++ // x10 result ++ // x11 src ++ // x12 src count ++ // x13 pattern ++ // x14 pattern count ++ address generate_string_indexof_linear(bool needle_isL, bool haystack_isL) ++ { ++ const char* stubName = needle_isL ++ ? (haystack_isL ? "indexof_linear_ll" : "indexof_linear_ul") ++ : "indexof_linear_uu"; ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", stubName); ++ address entry = __ pc(); ++ ++ int needle_chr_size = needle_isL ? 1 : 2; ++ int haystack_chr_size = haystack_isL ? 1 : 2; ++ int needle_chr_shift = needle_isL ? 0 : 1; ++ int haystack_chr_shift = haystack_isL ? 0 : 1; ++ bool isL = needle_isL && haystack_isL; ++ // parameters ++ Register result = x10, haystack = x11, haystack_len = x12, needle = x13, needle_len = x14; ++ // temporary registers ++ Register mask1 = x20, match_mask = x21, first = x22, trailing_zero = x23, mask2 = x24, tmp = x25; ++ // redefinitions ++ Register ch1 = x28, ch2 = x29; ++ RegSet spilled_regs = RegSet::range(x20, x25) + RegSet::range(x28, x29); ++ ++ __ push_reg(spilled_regs, sp); ++ ++ Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO, ++ L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED, ++ L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP, ++ L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH, ++ L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2, ++ L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH; ++ ++ __ ld(ch1, Address(needle)); ++ __ ld(ch2, Address(haystack)); ++ // src.length - pattern.length ++ __ sub(haystack_len, haystack_len, needle_len); ++ ++ // first is needle[0] ++ __ andi(first, ch1, needle_isL ? 0xFF : 0xFFFF, first); ++ __ mv(mask1, haystack_isL ? 0x0101010101010101 : 0x0001000100010001); ++ __ mul(first, first, mask1); ++ __ mv(mask2, haystack_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff); ++ if (needle_isL != haystack_isL) { ++ __ mv(tmp, ch1); ++ } ++ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size - 1); ++ __ blez(haystack_len, L_SMALL); ++ ++ if (needle_isL != haystack_isL) { ++ __ inflate_lo32(ch1, tmp, match_mask, trailing_zero); ++ } ++ // xorr, sub, orr, notr, andr ++ // compare and set match_mask[i] with 0x80/0x8000 (Latin1/UTF16) if ch2[i] == first[i] ++ // eg: ++ // first: aa aa aa aa aa aa aa aa ++ // ch2: aa aa li nx jd ka aa aa ++ // match_mask: 80 80 00 00 00 00 80 80 ++ __ compute_match_mask(ch2, first, match_mask, mask1, mask2); ++ ++ // search first char of needle, if success, goto L_HAS_ZERO; ++ __ bnez(match_mask, L_HAS_ZERO); ++ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size); ++ __ add(result, result, wordSize / haystack_chr_size); ++ __ add(haystack, haystack, wordSize); ++ __ bltz(haystack_len, L_POST_LOOP); ++ ++ __ bind(L_LOOP); ++ __ ld(ch2, Address(haystack)); ++ __ compute_match_mask(ch2, first, match_mask, mask1, mask2); ++ __ bnez(match_mask, L_HAS_ZERO); ++ ++ __ bind(L_LOOP_PROCEED); ++ __ sub(haystack_len, haystack_len, wordSize / haystack_chr_size); ++ __ add(haystack, haystack, wordSize); ++ __ add(result, result, wordSize / haystack_chr_size); ++ __ bgez(haystack_len, L_LOOP); ++ ++ __ bind(L_POST_LOOP); ++ __ mv(ch2, -wordSize / haystack_chr_size); ++ __ ble(haystack_len, ch2, NOMATCH); // no extra characters to check ++ __ ld(ch2, Address(haystack)); ++ __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift); ++ __ neg(haystack_len, haystack_len); ++ __ xorr(ch2, first, ch2); ++ __ sub(match_mask, ch2, mask1); ++ __ orr(ch2, ch2, mask2); ++ __ mv(trailing_zero, -1); // all bits set ++ __ j(L_SMALL_PROCEED); ++ ++ __ align(OptoLoopAlignment); ++ __ bind(L_SMALL); ++ __ slli(haystack_len, haystack_len, LogBitsPerByte + haystack_chr_shift); ++ __ neg(haystack_len, haystack_len); ++ if (needle_isL != haystack_isL) { ++ __ inflate_lo32(ch1, tmp, match_mask, trailing_zero); ++ } ++ __ xorr(ch2, first, ch2); ++ __ sub(match_mask, ch2, mask1); ++ __ orr(ch2, ch2, mask2); ++ __ mv(trailing_zero, -1); // all bits set ++ ++ __ bind(L_SMALL_PROCEED); ++ __ srl(trailing_zero, trailing_zero, haystack_len); // mask. zeroes on useless bits. ++ __ notr(ch2, ch2); ++ __ andr(match_mask, match_mask, ch2); ++ __ andr(match_mask, match_mask, trailing_zero); // clear useless bits and check ++ __ beqz(match_mask, NOMATCH); ++ ++ __ bind(L_SMALL_HAS_ZERO_LOOP); ++ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, ch2, tmp); // count trailing zeros ++ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); ++ __ mv(ch2, wordSize / haystack_chr_size); ++ __ ble(needle_len, ch2, L_SMALL_CMP_LOOP_LAST_CMP2); ++ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); ++ __ mv(trailing_zero, wordSize / haystack_chr_size); ++ __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); ++ ++ __ bind(L_SMALL_CMP_LOOP); ++ __ shadd(first, trailing_zero, needle, first, needle_chr_shift); ++ __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift); ++ needle_isL ? __ lbu(first, Address(first)) : __ lhu(first, Address(first)); ++ haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); ++ __ add(trailing_zero, trailing_zero, 1); ++ __ bge(trailing_zero, needle_len, L_SMALL_CMP_LOOP_LAST_CMP); ++ __ beq(first, ch2, L_SMALL_CMP_LOOP); ++ ++ __ bind(L_SMALL_CMP_LOOP_NOMATCH); ++ __ beqz(match_mask, NOMATCH); ++ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2); ++ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); ++ __ add(result, result, 1); ++ __ add(haystack, haystack, haystack_chr_size); ++ __ j(L_SMALL_HAS_ZERO_LOOP); ++ ++ __ align(OptoLoopAlignment); ++ __ bind(L_SMALL_CMP_LOOP_LAST_CMP); ++ __ bne(first, ch2, L_SMALL_CMP_LOOP_NOMATCH); ++ __ j(DONE); ++ ++ __ align(OptoLoopAlignment); ++ __ bind(L_SMALL_CMP_LOOP_LAST_CMP2); ++ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); ++ __ bne(ch1, ch2, L_SMALL_CMP_LOOP_NOMATCH); ++ __ j(DONE); ++ ++ __ align(OptoLoopAlignment); ++ __ bind(L_HAS_ZERO); ++ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, tmp, ch2); ++ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); ++ __ slli(needle_len, needle_len, BitsPerByte * wordSize / 2); ++ __ orr(haystack_len, haystack_len, needle_len); // restore needle_len(32bits) ++ __ sub(result, result, 1); // array index from 0, so result -= 1 ++ ++ __ bind(L_HAS_ZERO_LOOP); ++ __ mv(needle_len, wordSize / haystack_chr_size); ++ __ srli(ch2, haystack_len, BitsPerByte * wordSize / 2); ++ __ bge(needle_len, ch2, L_CMP_LOOP_LAST_CMP2); ++ // load next 8 bytes from haystack, and increase result index ++ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); ++ __ add(result, result, 1); ++ __ mv(trailing_zero, wordSize / haystack_chr_size); ++ __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); ++ ++ // compare one char ++ __ bind(L_CMP_LOOP); ++ __ shadd(needle_len, trailing_zero, needle, needle_len, needle_chr_shift); ++ needle_isL ? __ lbu(needle_len, Address(needle_len)) : __ lhu(needle_len, Address(needle_len)); ++ __ shadd(ch2, trailing_zero, haystack, ch2, haystack_chr_shift); ++ haystack_isL ? __ lbu(ch2, Address(ch2)) : __ lhu(ch2, Address(ch2)); ++ __ add(trailing_zero, trailing_zero, 1); // next char index ++ __ srli(tmp, haystack_len, BitsPerByte * wordSize / 2); ++ __ bge(trailing_zero, tmp, L_CMP_LOOP_LAST_CMP); ++ __ beq(needle_len, ch2, L_CMP_LOOP); ++ ++ __ bind(L_CMP_LOOP_NOMATCH); ++ __ beqz(match_mask, L_HAS_ZERO_LOOP_NOMATCH); ++ __ ctzc_bit(trailing_zero, match_mask, haystack_isL, needle_len, ch2); // find next "first" char index ++ __ addi(trailing_zero, trailing_zero, haystack_isL ? 7 : 15); ++ __ add(haystack, haystack, haystack_chr_size); ++ __ j(L_HAS_ZERO_LOOP); ++ ++ __ align(OptoLoopAlignment); ++ __ bind(L_CMP_LOOP_LAST_CMP); ++ __ bne(needle_len, ch2, L_CMP_LOOP_NOMATCH); ++ __ j(DONE); ++ ++ __ align(OptoLoopAlignment); ++ __ bind(L_CMP_LOOP_LAST_CMP2); ++ __ compute_index(haystack, trailing_zero, match_mask, result, ch2, tmp, haystack_isL); ++ __ add(result, result, 1); ++ __ bne(ch1, ch2, L_CMP_LOOP_NOMATCH); ++ __ j(DONE); ++ ++ __ align(OptoLoopAlignment); ++ __ bind(L_HAS_ZERO_LOOP_NOMATCH); ++ // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until ++ // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP, ++ // so, result was increased at max by wordSize/str2_chr_size - 1, so, ++ // respective high bit wasn't changed. L_LOOP_PROCEED will increase ++ // result by analyzed characters value, so, we can just reset lower bits ++ // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL ++ // 2) restore needle_len and haystack_len values from "compressed" haystack_len ++ // 3) advance haystack value to represent next haystack octet. result & 7/3 is ++ // index of last analyzed substring inside current octet. So, haystack in at ++ // respective start address. We need to advance it to next octet ++ __ andi(match_mask, result, wordSize / haystack_chr_size - 1); ++ __ srli(needle_len, haystack_len, BitsPerByte * wordSize / 2); ++ __ andi(result, result, haystack_isL ? -8 : -4); ++ __ slli(tmp, match_mask, haystack_chr_shift); ++ __ sub(haystack, haystack, tmp); ++ __ addw(haystack_len, haystack_len, zr); ++ __ j(L_LOOP_PROCEED); ++ ++ __ align(OptoLoopAlignment); ++ __ bind(NOMATCH); ++ __ mv(result, -1); ++ ++ __ bind(DONE); ++ __ pop_reg(spilled_regs, sp); ++ __ ret(); ++ return entry; ++ } ++ ++ void generate_string_indexof_stubs() ++ { ++ StubRoutines::riscv::_string_indexof_linear_ll = generate_string_indexof_linear(true, true); ++ StubRoutines::riscv::_string_indexof_linear_uu = generate_string_indexof_linear(false, false); ++ StubRoutines::riscv::_string_indexof_linear_ul = generate_string_indexof_linear(true, false); ++ } ++ ++ address generate_mulAdd() ++ { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "mulAdd"); ++ ++ address start = __ pc(); ++ ++ const Register out = x10; ++ const Register in = x11; ++ const Register offset = x12; ++ const Register len = x13; ++ const Register k = x14; ++ const Register tmp1 = x28; ++ const Register tmp2 = x29; ++ ++ BLOCK_COMMENT("Entry:"); ++ __ enter(); ++ __ mul_add(out, in, offset, len, k, tmp1, tmp2); ++ __ leave(); ++ __ ret(); ++ ++ return start; ++ } ++ ++ /** ++ * Arguments: ++ * ++ * Input: ++ * c_rarg0 - x address ++ * c_rarg1 - x length ++ * c_rarg2 - y address ++ * c_rarg3 - y lenth ++ * c_rarg4 - z address ++ * c_rarg5 - z length ++ */ ++ address generate_multiplyToLen() ++ { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); ++ address start = __ pc(); ++ ++ const Register x = x10; ++ const Register xlen = x11; ++ const Register y = x12; ++ const Register ylen = x13; ++ const Register z = x14; ++ const Register zlen = x15; ++ ++ const Register tmp1 = x16; ++ const Register tmp2 = x17; ++ const Register tmp3 = x7; ++ const Register tmp4 = x28; ++ const Register tmp5 = x29; ++ const Register tmp6 = x30; ++ const Register tmp7 = x31; ++ ++ RegSet spilled_regs = RegSet::of(tmp1, tmp2); ++ BLOCK_COMMENT("Entry:"); ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ __ push_reg(spilled_regs, sp); ++ __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); ++ __ pop_reg(spilled_regs, sp); ++ __ leave(); // required for proper stackwalking of RuntimeStub frame ++ __ ret(); ++ ++ return start; ++ } ++ ++ address generate_squareToLen() ++ { ++ // squareToLen algorithm for sizes 1..127 described in java code works ++ // faster than multiply_to_len on some CPUs and slower on others, but ++ // multiply_to_len shows a bit better overall results ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "squareToLen"); ++ address start = __ pc(); ++ ++ const Register x = x10; ++ const Register xlen = x11; ++ const Register z = x12; ++ const Register zlen = x13; ++ const Register y = x14; // == x ++ const Register ylen = x15; // == xlen ++ ++ const Register tmp1 = x16; ++ const Register tmp2 = x17; ++ const Register tmp3 = x7; ++ const Register tmp4 = x28; ++ const Register tmp5 = x29; ++ const Register tmp6 = x30; ++ const Register tmp7 = x31; ++ ++ RegSet spilled_regs = RegSet::of(y, tmp2); ++ BLOCK_COMMENT("Entry:"); ++ __ enter(); ++ __ push_reg(spilled_regs, sp); ++ __ mv(y, x); ++ __ mv(ylen, xlen); ++ __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); ++ __ pop_reg(spilled_regs, sp); ++ __ leave(); ++ __ ret(); ++ ++ return start; ++ } ++#endif // COMPILER2 ++ ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. ++ ++#undef __ ++#define __ masm-> ++ ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ Register arg1 = noreg, ++ Register arg2 = noreg) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ // n.b. riscv asserts that frame::arg_reg_save_area_bytes == 0 ++ assert_cond(runtime_entry != NULL); ++ enum layout { ++ fp_off = 0, ++ fp_off2, ++ return_off, ++ return_off2, ++ framesize // inclusive of return address ++ }; ++ ++ const int insts_size = 512; ++ const int locs_size = 64; ++ ++ CodeBuffer code(name, insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ assert_cond(oop_maps != NULL && masm != NULL); ++ ++ address start = __ pc(); ++ ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM ++ ++ __ enter(); // Save FP and RA before call ++ ++ assert(is_even(framesize / 2), "sp not 16-byte aligned"); ++ ++ // ra and fp are already in place ++ __ addi(sp, fp, 0 - ((unsigned)framesize << LogBytesPerInt)); // prolog ++ ++ int frame_complete = __ pc() - start; ++ ++ // Set up last_Java_sp and last_Java_fp ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(sp, fp, the_pc, t0); ++ ++ // Call runtime ++ if (arg1 != noreg) { ++ assert(arg2 != c_rarg1, "clobbered"); ++ __ mv(c_rarg1, arg1); ++ } ++ if (arg2 != noreg) { ++ __ mv(c_rarg2, arg2); ++ } ++ __ mv(c_rarg0, xthread); ++ BLOCK_COMMENT("call runtime_entry"); ++ int32_t offset = 0; ++ __ movptr_with_offset(t0, runtime_entry, offset); ++ __ jalr(x1, t0, offset); ++ ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ assert_cond(map != NULL); ++ ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ __ reset_last_Java_frame(true); ++ ++ __ leave(); ++ ++ // check for pending exceptions ++#ifdef ASSERT ++ Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ bnez(t0, L); ++ __ should_not_reach_here(); ++ __ bind(L); ++#endif // ASSERT ++ __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); ++ ++ ++ // codeBlob framesize is in words (not VMRegImpl::slot_size) ++ RuntimeStub* stub = ++ RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ (framesize >> (LogBytesPerWord - LogBytesPerInt)), ++ oop_maps, false); ++ assert(stub != NULL, "create runtime stub fail!"); ++ return stub->entry_point(); ++ } ++ ++#ifdef COMPILER2 ++ class MontgomeryMultiplyGenerator : public MacroAssembler { ++ ++ Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn, ++ Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2, Ri, Rj; ++ ++ RegSet _toSave; ++ bool _squaring; ++ ++ public: ++ MontgomeryMultiplyGenerator (Assembler *as, bool squaring) ++ : MacroAssembler(as->code()), _squaring(squaring) { ++ ++ // Register allocation ++ ++ Register reg = c_rarg0; ++ Pa_base = reg; // Argument registers ++ if (squaring) { ++ Pb_base = Pa_base; ++ } else { ++ Pb_base = ++reg; ++ } ++ Pn_base = ++reg; ++ Rlen= ++reg; ++ inv = ++reg; ++ Pm_base = ++reg; ++ ++ // Working registers: ++ Ra = ++reg; // The current digit of a, b, n, and m. ++ Rb = ++reg; ++ Rm = ++reg; ++ Rn = ++reg; ++ ++ Pa = ++reg; // Pointers to the current/next digit of a, b, n, and m. ++ Pb = ++reg; ++ Pm = ++reg; ++ Pn = ++reg; ++ ++ tmp0 = ++reg; // Three registers which form a ++ tmp1 = ++reg; // triple-precision accumuator. ++ tmp2 = ++reg; ++ ++ Ri = x6; // Inner and outer loop indexes. ++ Rj = x7; ++ ++ Rhi_ab = x28; // Product registers: low and high parts ++ Rlo_ab = x29; // of a*b and m*n. ++ Rhi_mn = x30; ++ Rlo_mn = x31; ++ ++ // x18 and up are callee-saved. ++ _toSave = RegSet::range(x18, reg) + Pm_base; ++ } ++ ++ private: ++ void save_regs() { ++ push_reg(_toSave, sp); ++ } ++ ++ void restore_regs() { ++ pop_reg(_toSave, sp); ++ } ++ ++ template ++ void unroll_2(Register count, T block) { ++ Label loop, end, odd; ++ beqz(count, end); ++ andi(t0, count, 0x1); ++ bnez(t0, odd); ++ align(16); ++ bind(loop); ++ (this->*block)(); ++ bind(odd); ++ (this->*block)(); ++ addi(count, count, -2); ++ bgtz(count, loop); ++ bind(end); ++ } ++ ++ template ++ void unroll_2(Register count, T block, Register d, Register s, Register tmp) { ++ Label loop, end, odd; ++ beqz(count, end); ++ andi(tmp, count, 0x1); ++ bnez(tmp, odd); ++ align(16); ++ bind(loop); ++ (this->*block)(d, s, tmp); ++ bind(odd); ++ (this->*block)(d, s, tmp); ++ addi(count, count, -2); ++ bgtz(count, loop); ++ bind(end); ++ } ++ ++ void pre1(RegisterOrConstant i) { ++ block_comment("pre1"); ++ // Pa = Pa_base; ++ // Pb = Pb_base + i; ++ // Pm = Pm_base; ++ // Pn = Pn_base + i; ++ // Ra = *Pa; ++ // Rb = *Pb; ++ // Rm = *Pm; ++ // Rn = *Pn; ++ if (i.is_register()) { ++ slli(t0, i.as_register(), LogBytesPerWord); ++ } else { ++ mv(t0, i.as_constant()); ++ slli(t0, t0, LogBytesPerWord); ++ } ++ ++ mv(Pa, Pa_base); ++ add(Pb, Pb_base, t0); ++ mv(Pm, Pm_base); ++ add(Pn, Pn_base, t0); ++ ++ ld(Ra, Address(Pa)); ++ ld(Rb, Address(Pb)); ++ ld(Rm, Address(Pm)); ++ ld(Rn, Address(Pn)); ++ ++ // Zero the m*n result. ++ mv(Rhi_mn, zr); ++ mv(Rlo_mn, zr); ++ } ++ ++ // The core multiply-accumulate step of a Montgomery ++ // multiplication. The idea is to schedule operations as a ++ // pipeline so that instructions with long latencies (loads and ++ // multiplies) have time to complete before their results are ++ // used. This most benefits in-order implementations of the ++ // architecture but out-of-order ones also benefit. ++ void step() { ++ block_comment("step"); ++ // MACC(Ra, Rb, tmp0, tmp1, tmp2); ++ // Ra = *++Pa; ++ // Rb = *--Pb; ++ mulhu(Rhi_ab, Ra, Rb); ++ mul(Rlo_ab, Ra, Rb); ++ addi(Pa, Pa, wordSize); ++ ld(Ra, Address(Pa)); ++ addi(Pb, Pb, -wordSize); ++ ld(Rb, Address(Pb)); ++ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n from the ++ // previous iteration. ++ // MACC(Rm, Rn, tmp0, tmp1, tmp2); ++ // Rm = *++Pm; ++ // Rn = *--Pn; ++ mulhu(Rhi_mn, Rm, Rn); ++ mul(Rlo_mn, Rm, Rn); ++ addi(Pm, Pm, wordSize); ++ ld(Rm, Address(Pm)); ++ addi(Pn, Pn, -wordSize); ++ ld(Rn, Address(Pn)); ++ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); ++ } ++ ++ void post1() { ++ block_comment("post1"); ++ ++ // MACC(Ra, Rb, tmp0, tmp1, tmp2); ++ // Ra = *++Pa; ++ // Rb = *--Pb; ++ mulhu(Rhi_ab, Ra, Rb); ++ mul(Rlo_ab, Ra, Rb); ++ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n ++ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); ++ ++ // *Pm = Rm = tmp0 * inv; ++ mul(Rm, tmp0, inv); ++ sd(Rm, Address(Pm)); ++ ++ // MACC(Rm, Rn, tmp0, tmp1, tmp2); ++ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; ++ mulhu(Rhi_mn, Rm, Rn); ++ ++#ifndef PRODUCT ++ // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply"); ++ { ++ mul(Rlo_mn, Rm, Rn); ++ add(Rlo_mn, tmp0, Rlo_mn); ++ Label ok; ++ beqz(Rlo_mn, ok); ++ stop("broken Montgomery multiply"); ++ bind(ok); ++ } ++#endif ++ // We have very carefully set things up so that ++ // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate ++ // the lower half of Rm * Rn because we know the result already: ++ // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff ++ // tmp0 != 0. So, rather than do a mul and an cad we just set ++ // the carry flag iff tmp0 is nonzero. ++ // ++ // mul(Rlo_mn, Rm, Rn); ++ // cad(zr, tmp0, Rlo_mn); ++ addi(t0, tmp0, -1); ++ sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero ++ cadc(tmp0, tmp1, Rhi_mn, t0); ++ adc(tmp1, tmp2, zr, t0); ++ mv(tmp2, zr); ++ } ++ ++ void pre2(Register i, Register len) { ++ block_comment("pre2"); ++ // Pa = Pa_base + i-len; ++ // Pb = Pb_base + len; ++ // Pm = Pm_base + i-len; ++ // Pn = Pn_base + len; ++ ++ sub(Rj, i, len); ++ // Rj == i-len ++ ++ // Ra as temp register ++ shadd(Pa, Rj, Pa_base, Ra, LogBytesPerWord); ++ shadd(Pm, Rj, Pm_base, Ra, LogBytesPerWord); ++ shadd(Pb, len, Pb_base, Ra, LogBytesPerWord); ++ shadd(Pn, len, Pn_base, Ra, LogBytesPerWord); ++ ++ // Ra = *++Pa; ++ // Rb = *--Pb; ++ // Rm = *++Pm; ++ // Rn = *--Pn; ++ add(Pa, Pa, wordSize); ++ ld(Ra, Address(Pa)); ++ add(Pb, Pb, -wordSize); ++ ld(Rb, Address(Pb)); ++ add(Pm, Pm, wordSize); ++ ld(Rm, Address(Pm)); ++ add(Pn, Pn, -wordSize); ++ ld(Rn, Address(Pn)); ++ ++ mv(Rhi_mn, zr); ++ mv(Rlo_mn, zr); ++ } ++ ++ void post2(Register i, Register len) { ++ block_comment("post2"); ++ sub(Rj, i, len); ++ ++ cad(tmp0, tmp0, Rlo_mn, t0); // The pending m*n, low part ++ ++ // As soon as we know the least significant digit of our result, ++ // store it. ++ // Pm_base[i-len] = tmp0; ++ // Rj as temp register ++ shadd(Rj, Rj, Pm_base, Rj, LogBytesPerWord); ++ sd(tmp0, Address(Rj)); ++ ++ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; ++ cadc(tmp0, tmp1, Rhi_mn, t0); // The pending m*n, high part ++ adc(tmp1, tmp2, zr, t0); ++ mv(tmp2, zr); ++ } ++ ++ // A carry in tmp0 after Montgomery multiplication means that we ++ // should subtract multiples of n from our result in m. We'll ++ // keep doing that until there is no carry. ++ void normalize(Register len) { ++ block_comment("normalize"); ++ // while (tmp0) ++ // tmp0 = sub(Pm_base, Pn_base, tmp0, len); ++ Label loop, post, again; ++ Register cnt = tmp1, i = tmp2; // Re-use registers; we're done with them now ++ beqz(tmp0, post); { ++ bind(again); { ++ mv(i, zr); ++ mv(cnt, len); ++ slli(Rn, i, LogBytesPerWord); ++ add(Rm, Pm_base, Rn); ++ ld(Rm, Address(Rm)); ++ add(Rn, Pn_base, Rn); ++ ld(Rn, Address(Rn)); ++ mv(t0, 1); // set carry flag, i.e. no borrow ++ align(16); ++ bind(loop); { ++ notr(Rn, Rn); ++ add(Rm, Rm, t0); ++ add(Rm, Rm, Rn); ++ sltu(t0, Rm, Rn); ++ shadd(Rn, i, Pm_base, Rn, LogBytesPerWord); // Rn as temp register ++ sd(Rm, Address(Rn)); ++ add(i, i, 1); ++ slli(Rn, i, LogBytesPerWord); ++ add(Rm, Pm_base, Rn); ++ ld(Rm, Address(Rm)); ++ add(Rn, Pn_base, Rn); ++ ld(Rn, Address(Rn)); ++ sub(cnt, cnt, 1); ++ } bnez(cnt, loop); ++ addi(tmp0, tmp0, -1); ++ add(tmp0, tmp0, t0); ++ } bnez(tmp0, again); ++ } bind(post); ++ } ++ ++ // Move memory at s to d, reversing words. ++ // Increments d to end of copied memory ++ // Destroys tmp1, tmp2 ++ // Preserves len ++ // Leaves s pointing to the address which was in d at start ++ void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { ++ assert(tmp1 < x28 && tmp2 < x28, "register corruption"); ++ ++ shadd(s, len, s, tmp1, LogBytesPerWord); ++ mv(tmp1, len); ++ unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); ++ slli(tmp1, len, LogBytesPerWord); ++ sub(s, d, tmp1); ++ } ++ // [63...0] -> [31...0][63...32] ++ void reverse1(Register d, Register s, Register tmp) { ++ addi(s, s, -wordSize); ++ ld(tmp, Address(s)); ++ ror_imm(tmp, tmp, 32, t0); ++ sd(tmp, Address(d)); ++ addi(d, d, wordSize); ++ } ++ ++ void step_squaring() { ++ // An extra ACC ++ step(); ++ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); ++ } ++ ++ void last_squaring(Register i) { ++ Label dont; ++ // if ((i & 1) == 0) { ++ andi(t0, i, 0x1); ++ bnez(t0, dont); { ++ // MACC(Ra, Rb, tmp0, tmp1, tmp2); ++ // Ra = *++Pa; ++ // Rb = *--Pb; ++ mulhu(Rhi_ab, Ra, Rb); ++ mul(Rlo_ab, Ra, Rb); ++ acc(Rhi_ab, Rlo_ab, tmp0, tmp1, tmp2); ++ } bind(dont); ++ } ++ ++ void extra_step_squaring() { ++ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n ++ ++ // MACC(Rm, Rn, tmp0, tmp1, tmp2); ++ // Rm = *++Pm; ++ // Rn = *--Pn; ++ mulhu(Rhi_mn, Rm, Rn); ++ mul(Rlo_mn, Rm, Rn); ++ addi(Pm, Pm, wordSize); ++ ld(Rm, Address(Pm)); ++ addi(Pn, Pn, -wordSize); ++ ld(Rn, Address(Pn)); ++ } ++ ++ ++ void post1_squaring() { ++ acc(Rhi_mn, Rlo_mn, tmp0, tmp1, tmp2); // The pending m*n ++ ++ // *Pm = Rm = tmp0 * inv; ++ mul(Rm, tmp0, inv); ++ sd(Rm, Address(Pm)); ++ ++ // MACC(Rm, Rn, tmp0, tmp1, tmp2); ++ // tmp0 = tmp1; tmp1 = tmp2; tmp2 = 0; ++ mulhu(Rhi_mn, Rm, Rn); ++ ++#ifndef PRODUCT ++ // assert(m[i] * n[0] + tmp0 == 0, "broken Montgomery multiply"); ++ { ++ mul(Rlo_mn, Rm, Rn); ++ add(Rlo_mn, tmp0, Rlo_mn); ++ Label ok; ++ beqz(Rlo_mn, ok); { ++ stop("broken Montgomery multiply"); ++ } bind(ok); ++ } ++#endif ++ // We have very carefully set things up so that ++ // m[i]*n[0] + tmp0 == 0 (mod b), so we don't have to calculate ++ // the lower half of Rm * Rn because we know the result already: ++ // it must be -tmp0. tmp0 + (-tmp0) must generate a carry iff ++ // tmp0 != 0. So, rather than do a mul and a cad we just set ++ // the carry flag iff tmp0 is nonzero. ++ // ++ // mul(Rlo_mn, Rm, Rn); ++ // cad(zr, tmp0, Rlo_mn); ++ addi(t0, tmp0, -1); ++ sltu(t0, t0, tmp0); // Set carry iff tmp0 is nonzero ++ cadc(tmp0, tmp1, Rhi_mn, t0); ++ adc(tmp1, tmp2, zr, t0); ++ mv(tmp2, zr); ++ } ++ ++ // use t0 as carry ++ void acc(Register Rhi, Register Rlo, ++ Register tmp0, Register tmp1, Register tmp2) { ++ cad(tmp0, tmp0, Rlo, t0); ++ cadc(tmp1, tmp1, Rhi, t0); ++ adc(tmp2, tmp2, zr, t0); ++ } ++ ++ public: ++ /** ++ * Fast Montgomery multiplication. The derivation of the ++ * algorithm is in A Cryptographic Library for the Motorola ++ * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. ++ * ++ * Arguments: ++ * ++ * Inputs for multiplication: ++ * c_rarg0 - int array elements a ++ * c_rarg1 - int array elements b ++ * c_rarg2 - int array elements n (the modulus) ++ * c_rarg3 - int length ++ * c_rarg4 - int inv ++ * c_rarg5 - int array elements m (the result) ++ * ++ * Inputs for squaring: ++ * c_rarg0 - int array elements a ++ * c_rarg1 - int array elements n (the modulus) ++ * c_rarg2 - int length ++ * c_rarg3 - int inv ++ * c_rarg4 - int array elements m (the result) ++ * ++ */ ++ address generate_multiply() { ++ Label argh, nothing; ++ bind(argh); ++ stop("MontgomeryMultiply total_allocation must be <= 8192"); ++ ++ align(CodeEntryAlignment); ++ address entry = pc(); ++ ++ beqz(Rlen, nothing); ++ ++ enter(); ++ ++ // Make room. ++ mv(Ra, 512); ++ bgt(Rlen, Ra, argh); ++ slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); ++ sub(Ra, sp, Ra); ++ andi(sp, Ra, -2 * wordSize); ++ ++ srliw(Rlen, Rlen, 1); // length in longwords = len/2 ++ ++ { ++ // Copy input args, reversing as we go. We use Ra as a ++ // temporary variable. ++ reverse(Ra, Pa_base, Rlen, Ri, Rj); ++ if (!_squaring) ++ reverse(Ra, Pb_base, Rlen, Ri, Rj); ++ reverse(Ra, Pn_base, Rlen, Ri, Rj); ++ } ++ ++ // Push all call-saved registers and also Pm_base which we'll need ++ // at the end. ++ save_regs(); ++ ++#ifndef PRODUCT ++ // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ { ++ ld(Rn, Address(Pn_base)); ++ mul(Rlo_mn, Rn, inv); ++ mv(t0, -1); ++ Label ok; ++ beq(Rlo_mn, t0, ok); ++ stop("broken inverse in Montgomery multiply"); ++ bind(ok); ++ } ++#endif ++ ++ mv(Pm_base, Ra); ++ ++ mv(tmp0, zr); ++ mv(tmp1, zr); ++ mv(tmp2, zr); ++ ++ block_comment("for (int i = 0; i < len; i++) {"); ++ mv(Ri, zr); { ++ Label loop, end; ++ bge(Ri, Rlen, end); ++ ++ bind(loop); ++ pre1(Ri); ++ ++ block_comment(" for (j = i; j; j--) {"); { ++ mv(Rj, Ri); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step); ++ } block_comment(" } // j"); ++ ++ post1(); ++ addw(Ri, Ri, 1); ++ blt(Ri, Rlen, loop); ++ bind(end); ++ block_comment("} // i"); ++ } ++ ++ block_comment("for (int i = len; i < 2*len; i++) {"); ++ mv(Ri, Rlen); { ++ Label loop, end; ++ slli(Rj, Rlen, 1); // Rj as temp register ++ bge(Ri, Rj, end); ++ ++ bind(loop); ++ pre2(Ri, Rlen); ++ ++ block_comment(" for (j = len*2-i-1; j; j--) {"); { ++ slliw(Rj, Rlen, 1); ++ subw(Rj, Rj, Ri); ++ subw(Rj, Rj, 1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step); ++ } block_comment(" } // j"); ++ ++ post2(Ri, Rlen); ++ addw(Ri, Ri, 1); ++ slli(Rj, Rlen, 1); ++ blt(Ri, Rj, loop); ++ bind(end); ++ } ++ block_comment("} // i"); ++ ++ ++ normalize(Rlen); ++ ++ mv(Ra, Pm_base); // Save Pm_base in Ra ++ restore_regs(); // Restore caller's Pm_base ++ ++ // Copy our result into caller's Pm_base ++ reverse(Pm_base, Ra, Rlen, Ri, Rj); ++ ++ leave(); ++ bind(nothing); ++ ret(); ++ ++ return entry; ++ } ++ ++ /** ++ * ++ * Arguments: ++ * ++ * Inputs: ++ * c_rarg0 - int array elements a ++ * c_rarg1 - int array elements n (the modulus) ++ * c_rarg2 - int length ++ * c_rarg3 - int inv ++ * c_rarg4 - int array elements m (the result) ++ * ++ */ ++ address generate_square() { ++ Label argh; ++ bind(argh); ++ stop("MontgomeryMultiply total_allocation must be <= 8192"); ++ ++ align(CodeEntryAlignment); ++ address entry = pc(); ++ ++ enter(); ++ ++ // Make room. ++ mv(Ra, 512); ++ bgt(Rlen, Ra, argh); ++ slli(Ra, Rlen, exact_log2(4 * sizeof(jint))); ++ sub(Ra, sp, Ra); ++ andi(sp, Ra, -2 * wordSize); ++ ++ srliw(Rlen, Rlen, 1); // length in longwords = len/2 ++ ++ { ++ // Copy input args, reversing as we go. We use Ra as a ++ // temporary variable. ++ reverse(Ra, Pa_base, Rlen, Ri, Rj); ++ reverse(Ra, Pn_base, Rlen, Ri, Rj); ++ } ++ ++ // Push all call-saved registers and also Pm_base which we'll need ++ // at the end. ++ save_regs(); ++ ++ mv(Pm_base, Ra); ++ ++ mv(tmp0, zr); ++ mv(tmp1, zr); ++ mv(tmp2, zr); ++ ++ block_comment("for (int i = 0; i < len; i++) {"); ++ mv(Ri, zr); { ++ Label loop, end; ++ bind(loop); ++ bge(Ri, Rlen, end); ++ ++ pre1(Ri); ++ ++ block_comment("for (j = (i+1)/2; j; j--) {"); { ++ addi(Rj, Ri, 1); ++ srliw(Rj, Rj, 1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); ++ } block_comment(" } // j"); ++ ++ last_squaring(Ri); ++ ++ block_comment(" for (j = i/2; j; j--) {"); { ++ srliw(Rj, Ri, 1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); ++ } block_comment(" } // j"); ++ ++ post1_squaring(); ++ addi(Ri, Ri, 1); ++ blt(Ri, Rlen, loop); ++ ++ bind(end); ++ block_comment("} // i"); ++ } ++ ++ block_comment("for (int i = len; i < 2*len; i++) {"); ++ mv(Ri, Rlen); { ++ Label loop, end; ++ bind(loop); ++ slli(Rj, Rlen, 1); ++ bge(Ri, Rj, end); ++ ++ pre2(Ri, Rlen); ++ ++ block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); { ++ slli(Rj, Rlen, 1); ++ sub(Rj, Rj, Ri); ++ sub(Rj, Rj, 1); ++ srliw(Rj, Rj, 1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); ++ } block_comment(" } // j"); ++ ++ last_squaring(Ri); ++ ++ block_comment(" for (j = (2*len-i)/2; j; j--) {"); { ++ slli(Rj, Rlen, 1); ++ sub(Rj, Rj, Ri); ++ srliw(Rj, Rj, 1); ++ unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); ++ } block_comment(" } // j"); ++ ++ post2(Ri, Rlen); ++ addi(Ri, Ri, 1); ++ slli(t0, Rlen, 1); ++ blt(Ri, t0, loop); ++ ++ bind(end); ++ block_comment("} // i"); ++ } ++ ++ normalize(Rlen); ++ ++ mv(Ra, Pm_base); // Save Pm_base in Ra ++ restore_regs(); // Restore caller's Pm_base ++ ++ // Copy our result into caller's Pm_base ++ reverse(Pm_base, Ra, Rlen, Ri, Rj); ++ ++ leave(); ++ ret(); ++ ++ return entry; ++ } ++ }; ++#endif // COMPILER2 ++ ++ // Initialization ++ void generate_initial() { ++ // Generate initial stubs and initializes the entry points ++ ++ // entry points that exist in all platforms Note: This is code ++ // that could be shared among different platforms - however the ++ // benefit seems to be smaller than the disadvantage of having a ++ // much more complicated generator structure. See also comment in ++ // stubRoutines.hpp. ++ ++ StubRoutines::_forward_exception_entry = generate_forward_exception(); ++ ++ StubRoutines::_call_stub_entry = ++ generate_call_stub(StubRoutines::_call_stub_return_address); ++ ++ // is referenced by megamorphic call ++ StubRoutines::_catch_exception_entry = generate_catch_exception(); ++ ++ // Build this early so it's available for the interpreter. ++ StubRoutines::_throw_StackOverflowError_entry = ++ generate_throw_exception("StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, ++ SharedRuntime::throw_StackOverflowError)); ++ StubRoutines::_throw_delayed_StackOverflowError_entry = ++ generate_throw_exception("delayed StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, ++ SharedRuntime::throw_delayed_StackOverflowError)); ++ } ++ ++ void generate_all() { ++ // support for verify_oop (must happen after universe_init) ++ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); ++ StubRoutines::_throw_AbstractMethodError_entry = ++ generate_throw_exception("AbstractMethodError throw_exception", ++ CAST_FROM_FN_PTR(address, ++ SharedRuntime:: ++ throw_AbstractMethodError)); ++ ++ StubRoutines::_throw_IncompatibleClassChangeError_entry = ++ generate_throw_exception("IncompatibleClassChangeError throw_exception", ++ CAST_FROM_FN_PTR(address, ++ SharedRuntime:: ++ throw_IncompatibleClassChangeError)); ++ ++ StubRoutines::_throw_NullPointerException_at_call_entry = ++ generate_throw_exception("NullPointerException at call throw_exception", ++ CAST_FROM_FN_PTR(address, ++ SharedRuntime:: ++ throw_NullPointerException_at_call)); ++ // arraycopy stubs used by compilers ++ generate_arraycopy_stubs(); ++ ++#ifdef COMPILER2 ++ if (UseMulAddIntrinsic) { ++ StubRoutines::_mulAdd = generate_mulAdd(); ++ } ++ ++ if (UseMultiplyToLenIntrinsic) { ++ StubRoutines::_multiplyToLen = generate_multiplyToLen(); ++ } ++ ++ if (UseSquareToLenIntrinsic) { ++ StubRoutines::_squareToLen = generate_squareToLen(); ++ } ++ ++ generate_compare_long_strings(); ++ ++ generate_string_indexof_stubs(); ++ ++ if (UseMontgomeryMultiplyIntrinsic) { ++ StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); ++ MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); ++ StubRoutines::_montgomeryMultiply = g.generate_multiply(); ++ } ++ ++ if (UseMontgomerySquareIntrinsic) { ++ StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); ++ MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); ++ StubRoutines::_montgomerySquare = g.generate_square(); ++ } ++#endif // COMPILER2 ++ // Safefetch stubs. ++ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, ++ &StubRoutines::_safefetch32_fault_pc, ++ &StubRoutines::_safefetch32_continuation_pc); ++ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, ++ &StubRoutines::_safefetchN_fault_pc, ++ &StubRoutines::_safefetchN_continuation_pc); ++ ++ StubRoutines::riscv::set_completed(); ++ } ++ ++ public: ++ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { ++ if (all) { ++ generate_all(); ++ } else { ++ generate_initial(); ++ } ++ } ++ ++ ~StubGenerator() {} ++}; // end class declaration ++ ++void StubGenerator_generate(CodeBuffer* code, bool all) { ++ StubGenerator g(code, all); ++} +diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp +new file mode 100644 +index 000000000..633108b95 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.cpp +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "utilities/globalDefinitions.hpp" ++ ++// Implementation of the platform-specific part of StubRoutines - for ++// a description of how to extend it, see the stubRoutines.hpp file. ++ ++address StubRoutines::riscv::_get_previous_fp_entry = NULL; ++address StubRoutines::riscv::_get_previous_sp_entry = NULL; ++ ++address StubRoutines::riscv::_f2i_fixup = NULL; ++address StubRoutines::riscv::_f2l_fixup = NULL; ++address StubRoutines::riscv::_d2i_fixup = NULL; ++address StubRoutines::riscv::_d2l_fixup = NULL; ++address StubRoutines::riscv::_float_sign_mask = NULL; ++address StubRoutines::riscv::_float_sign_flip = NULL; ++address StubRoutines::riscv::_double_sign_mask = NULL; ++address StubRoutines::riscv::_double_sign_flip = NULL; ++address StubRoutines::riscv::_zero_blocks = NULL; ++address StubRoutines::riscv::_has_negatives = NULL; ++address StubRoutines::riscv::_has_negatives_long = NULL; ++address StubRoutines::riscv::_compare_long_string_LL = NULL; ++address StubRoutines::riscv::_compare_long_string_UU = NULL; ++address StubRoutines::riscv::_compare_long_string_LU = NULL; ++address StubRoutines::riscv::_compare_long_string_UL = NULL; ++address StubRoutines::riscv::_string_indexof_linear_ll = NULL; ++address StubRoutines::riscv::_string_indexof_linear_uu = NULL; ++address StubRoutines::riscv::_string_indexof_linear_ul = NULL; ++address StubRoutines::riscv::_large_byte_array_inflate = NULL; ++ ++bool StubRoutines::riscv::_completed = false; +diff --git a/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp +new file mode 100644 +index 000000000..8aa81980e +--- /dev/null ++++ b/src/hotspot/cpu/riscv/stubRoutines_riscv.hpp +@@ -0,0 +1,179 @@ ++/* ++ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_STUBROUTINES_RISCV_HPP ++#define CPU_RISCV_STUBROUTINES_RISCV_HPP ++ ++// This file holds the platform specific parts of the StubRoutines ++// definition. See stubRoutines.hpp for a description on how to ++// extend it. ++ ++static bool returns_to_call_stub(address return_pc) { ++ return return_pc == _call_stub_return_address; ++} ++ ++enum platform_dependent_constants { ++ code_size1 = 19000, // simply increase if too small (assembler will crash if too small) ++ code_size2 = 36000 // simply increase if too small (assembler will crash if too small) ++}; ++ ++class riscv { ++ friend class StubGenerator; ++ ++ private: ++ static address _get_previous_fp_entry; ++ static address _get_previous_sp_entry; ++ ++ static address _f2i_fixup; ++ static address _f2l_fixup; ++ static address _d2i_fixup; ++ static address _d2l_fixup; ++ ++ static address _float_sign_mask; ++ static address _float_sign_flip; ++ static address _double_sign_mask; ++ static address _double_sign_flip; ++ ++ static address _zero_blocks; ++ ++ static address _has_negatives; ++ static address _has_negatives_long; ++ static address _compare_long_string_LL; ++ static address _compare_long_string_LU; ++ static address _compare_long_string_UL; ++ static address _compare_long_string_UU; ++ static address _string_indexof_linear_ll; ++ static address _string_indexof_linear_uu; ++ static address _string_indexof_linear_ul; ++ static address _large_byte_array_inflate; ++ static bool _completed; ++ ++ public: ++ ++ static address get_previous_fp_entry() ++ { ++ return _get_previous_fp_entry; ++ } ++ ++ static address get_previous_sp_entry() ++ { ++ return _get_previous_sp_entry; ++ } ++ ++ static address f2i_fixup() ++ { ++ return _f2i_fixup; ++ } ++ ++ static address f2l_fixup() ++ { ++ return _f2l_fixup; ++ } ++ ++ static address d2i_fixup() ++ { ++ return _d2i_fixup; ++ } ++ ++ static address d2l_fixup() ++ { ++ return _d2l_fixup; ++ } ++ ++ static address float_sign_mask() ++ { ++ return _float_sign_mask; ++ } ++ ++ static address float_sign_flip() ++ { ++ return _float_sign_flip; ++ } ++ ++ static address double_sign_mask() ++ { ++ return _double_sign_mask; ++ } ++ ++ static address double_sign_flip() ++ { ++ return _double_sign_flip; ++ } ++ ++ static address zero_blocks() { ++ return _zero_blocks; ++ } ++ ++ static address has_negatives() { ++ return _has_negatives; ++ } ++ ++ static address has_negatives_long() { ++ return _has_negatives_long; ++ } ++ ++ static address compare_long_string_LL() { ++ return _compare_long_string_LL; ++ } ++ ++ static address compare_long_string_LU() { ++ return _compare_long_string_LU; ++ } ++ ++ static address compare_long_string_UL() { ++ return _compare_long_string_UL; ++ } ++ ++ static address compare_long_string_UU() { ++ return _compare_long_string_UU; ++ } ++ ++ static address string_indexof_linear_ul() { ++ return _string_indexof_linear_ul; ++ } ++ ++ static address string_indexof_linear_ll() { ++ return _string_indexof_linear_ll; ++ } ++ ++ static address string_indexof_linear_uu() { ++ return _string_indexof_linear_uu; ++ } ++ ++ static address large_byte_array_inflate() { ++ return _large_byte_array_inflate; ++ } ++ ++ static bool complete() { ++ return _completed; ++ } ++ ++ static void set_completed() { ++ _completed = true; ++ } ++}; ++ ++#endif // CPU_RISCV_STUBROUTINES_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +new file mode 100644 +index 000000000..f5e212204 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +@@ -0,0 +1,1841 @@ ++/* ++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "classfile/javaClasses.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/bytecodeTracer.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateInterpreterGenerator.hpp" ++#include "interpreter/templateTable.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/method.hpp" ++#include "oops/methodData.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++#include "utilities/macros.hpp" ++#include ++ ++#ifndef PRODUCT ++#include "oops/method.hpp" ++#endif // !PRODUCT ++ ++// Size of interpreter code. Increase if too small. Interpreter will ++// fail with a guarantee ("not enough space for interpreter generation"); ++// if too small. ++// Run with +PrintInterpreter to get the VM to print out the size. ++// Max size with JVMTI ++int TemplateInterpreter::InterpreterCodeSize = 256 * 1024; ++ ++#define __ _masm-> ++ ++//----------------------------------------------------------------------------- ++ ++address TemplateInterpreterGenerator::generate_slow_signature_handler() { ++ address entry = __ pc(); ++ ++ __ andi(esp, esp, -16); ++ __ mv(c_rarg3, esp); ++ // xmethod ++ // xlocals ++ // c_rarg3: first stack arg - wordSize ++ // adjust sp ++ ++ __ addi(sp, c_rarg3, -18 * wordSize); ++ __ addi(sp, sp, -2 * wordSize); ++ __ sd(ra, Address(sp, 0)); ++ ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::slow_signature_handler), ++ xmethod, xlocals, c_rarg3); ++ ++ // x10: result handler ++ ++ // Stack layout: ++ // sp: return address <- sp ++ // 1 garbage ++ // 8 integer args (if static first is unused) ++ // 1 float/double identifiers ++ // 8 double args ++ // stack args <- esp ++ // garbage ++ // expression stack bottom ++ // bcp (NULL) ++ // ... ++ ++ // Restore RA ++ __ ld(ra, Address(sp, 0)); ++ __ addi(sp, sp , 2 * wordSize); ++ ++ // Do FP first so we can use c_rarg3 as temp ++ __ lwu(c_rarg3, Address(sp, 9 * wordSize)); // float/double identifiers ++ ++ for (int i = 0; i < Argument::n_float_register_parameters_c; i++) { ++ const FloatRegister r = g_FPArgReg[i]; ++ Label d, done; ++ ++ __ andi(t0, c_rarg3, 1UL << i); ++ __ bnez(t0, d); ++ __ flw(r, Address(sp, (10 + i) * wordSize)); ++ __ j(done); ++ __ bind(d); ++ __ fld(r, Address(sp, (10 + i) * wordSize)); ++ __ bind(done); ++ } ++ ++ // c_rarg0 contains the result from the call of ++ // InterpreterRuntime::slow_signature_handler so we don't touch it ++ // here. It will be loaded with the JNIEnv* later. ++ for (int i = 1; i < Argument::n_int_register_parameters_c; i++) { ++ const Register rm = g_INTArgReg[i]; ++ __ ld(rm, Address(sp, i * wordSize)); ++ } ++ ++ __ addi(sp, sp, 18 * wordSize); ++ __ ret(); ++ ++ return entry; ++} ++ ++// Various method entries ++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { ++ // xmethod: Method* ++ // x30: sender sp ++ // esp: args ++ ++ if (!InlineIntrinsics) { ++ return NULL; // Generate a vanilla entry ++ } ++ ++ // These don't need a safepoint check because they aren't virtually ++ // callable. We won't enter these intrinsics from compiled code. ++ // If in the future we added an intrinsic which was virtually callable ++ // we'd have to worry about how to safepoint so that this code is used. ++ ++ // mathematical functions inlined by compiler ++ // (interpreter must provide identical implementation ++ // in order to avoid monotonicity bugs when switching ++ // from interpreter to compiler in the middle of some ++ // computation) ++ // ++ // stack: ++ // [ arg ] <-- esp ++ // [ arg ] ++ // retaddr in ra ++ ++ address fn = NULL; ++ address entry_point = NULL; ++ Register continuation = ra; ++ switch (kind) { ++ case Interpreter::java_lang_math_abs: ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ fabs_d(f10, f10); ++ __ mv(sp, x30); // Restore caller's SP ++ break; ++ case Interpreter::java_lang_math_sqrt: ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ fsqrt_d(f10, f10); ++ __ mv(sp, x30); ++ break; ++ case Interpreter::java_lang_math_sin : ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ mv(sp, x30); ++ __ mv(x9, ra); ++ continuation = x9; // The first callee-saved register ++ if (StubRoutines::dsin() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_cos : ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ mv(sp, x30); ++ __ mv(x9, ra); ++ continuation = x9; // The first callee-saved register ++ if (StubRoutines::dcos() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_tan : ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ mv(sp, x30); ++ __ mv(x9, ra); ++ continuation = x9; // The first callee-saved register ++ if (StubRoutines::dtan() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_log : ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ mv(sp, x30); ++ __ mv(x9, ra); ++ continuation = x9; // The first callee-saved register ++ if (StubRoutines::dlog() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_log10 : ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ mv(sp, x30); ++ __ mv(x9, ra); ++ continuation = x9; // The first callee-saved register ++ if (StubRoutines::dlog10() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_exp : ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp)); ++ __ mv(sp, x30); ++ __ mv(x9, ra); ++ continuation = x9; // The first callee-saved register ++ if (StubRoutines::dexp() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_pow : ++ entry_point = __ pc(); ++ __ mv(x9, ra); ++ continuation = x9; ++ __ fld(f10, Address(esp, 2 * Interpreter::stackElementSize)); ++ __ fld(f11, Address(esp)); ++ __ mv(sp, x30); ++ if (StubRoutines::dpow() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); ++ } ++ __ mv(t0, fn); ++ __ jalr(t0); ++ break; ++ case Interpreter::java_lang_math_fmaD : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ fld(f10, Address(esp, 4 * Interpreter::stackElementSize)); ++ __ fld(f11, Address(esp, 2 * Interpreter::stackElementSize)); ++ __ fld(f12, Address(esp)); ++ __ fmadd_d(f10, f10, f11, f12); ++ __ mv(sp, x30); // Restore caller's SP ++ } ++ break; ++ case Interpreter::java_lang_math_fmaF : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ flw(f10, Address(esp, 2 * Interpreter::stackElementSize)); ++ __ flw(f11, Address(esp, Interpreter::stackElementSize)); ++ __ flw(f12, Address(esp)); ++ __ fmadd_s(f10, f10, f11, f12); ++ __ mv(sp, x30); // Restore caller's SP ++ } ++ break; ++ default: ++ ; ++ } ++ if (entry_point != NULL) { ++ __ jr(continuation); ++ } ++ ++ return entry_point; ++} ++ ++// Abstract method entry ++// Attempt to execute abstract method. Throw exception ++address TemplateInterpreterGenerator::generate_abstract_entry(void) { ++ // xmethod: Method* ++ // x30: sender SP ++ ++ address entry_point = __ pc(); ++ ++ // abstract method entry ++ ++ // pop return address, reset last_sp to NULL ++ __ empty_expression_stack(); ++ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) ++ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) ++ ++ // throw exception ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_AbstractMethodErrorWithMethod), ++ xmethod); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ return entry_point; ++} ++ ++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { ++ address entry = __ pc(); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(t0, Address(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); ++ __ mv(t1, sp); ++ // maximal sp for current fp (stack grows negative) ++ // check if frame is complete ++ __ bge(t0, t1, L); ++ __ stop ("interpreter frame not set up"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // Restore bcp under the assumption that the current frame is still ++ // interpreted ++ __ restore_bcp(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // throw exception ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // setup parameters ++ ++ // convention: expect aberrant index in register x11 ++ __ zero_extend(c_rarg2, x11, 32); ++ // convention: expect array in register x13 ++ __ mv(c_rarg1, x13); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime:: ++ throw_ArrayIndexOutOfBoundsException), ++ c_rarg1, c_rarg2); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ClassCastException_handler() { ++ address entry = __ pc(); ++ ++ // object is at TOS ++ __ pop_reg(c_rarg1); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime:: ++ throw_ClassCastException), ++ c_rarg1); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_exception_handler_common( ++ const char* name, const char* message, bool pass_oop) { ++ assert(!pass_oop || message == NULL, "either oop or message but not both"); ++ address entry = __ pc(); ++ if (pass_oop) { ++ // object is at TOS ++ __ pop_reg(c_rarg2); ++ } ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // setup parameters ++ __ la(c_rarg1, Address((address)name)); ++ if (pass_oop) { ++ __ call_VM(x10, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime:: ++ create_klass_exception), ++ c_rarg1, c_rarg2); ++ } else { ++ // kind of lame ExternalAddress can't take NULL because ++ // external_word_Relocation will assert. ++ if (message != NULL) { ++ __ la(c_rarg2, Address((address)message)); ++ } else { ++ __ mv(c_rarg2, NULL_WORD); ++ } ++ __ call_VM(x10, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), ++ c_rarg1, c_rarg2); ++ } ++ // throw exception ++ __ j(address(Interpreter::throw_exception_entry())); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { ++ address entry = __ pc(); ++ ++ // Restore stack bottom in case i2c adjusted stack ++ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // and NULL it as marker that esp is now tos until next java call ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ restore_constant_pool_cache(); ++ __ get_method(xmethod); ++ ++ if (state == atos) { ++ Register obj = x10; ++ Register mdp = x11; ++ Register tmp = x12; ++ __ ld(mdp, Address(xmethod, Method::method_data_offset())); ++ __ profile_return_type(mdp, obj, tmp); ++ } ++ ++ // Pop N words from the stack ++ __ get_cache_and_index_at_bcp(x11, x12, 1, index_size); ++ __ ld(x11, Address(x11, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ andi(x11, x11, ConstantPoolCacheEntry::parameter_size_mask); ++ ++ __ shadd(esp, x11, esp, t0, 3); ++ ++ // Restore machine SP ++ __ ld(t0, Address(xmethod, Method::const_offset())); ++ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); ++ __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2); ++ __ ld(t1, ++ Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); ++ __ slli(t0, t0, 3); ++ __ sub(t0, t1, t0); ++ __ andi(sp, t0, -16); ++ ++ __ check_and_handle_popframe(xthread); ++ __ check_and_handle_earlyret(xthread); ++ ++ __ get_dispatch(); ++ __ dispatch_next(state, step); ++ ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, ++ int step, ++ address continuation) { ++ address entry = __ pc(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ restore_constant_pool_cache(); ++ __ get_method(xmethod); ++ __ get_dispatch(); ++ ++ // Calculate stack limit ++ __ ld(t0, Address(xmethod, Method::const_offset())); ++ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); ++ __ addi(t0, t0, frame::interpreter_frame_monitor_size() + 2); ++ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); ++ __ slli(t0, t0, 3); ++ __ sub(t0, t1, t0); ++ __ andi(sp, t0, -16); ++ ++ // Restore expression stack pointer ++ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // NULL last_sp until next java call ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ ++ // handle exceptions ++ { ++ Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, L); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ ++ if (continuation == NULL) { ++ __ dispatch_next(state, step); ++ } else { ++ __ jump_to_entry(continuation); ++ } ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) { ++ address entry = __ pc(); ++ if (type == T_OBJECT) { ++ // retrieve result from frame ++ __ ld(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize)); ++ // and verify it ++ __ verify_oop(x10); ++ } else { ++ __ cast_primitive_type(type, x10); ++ } ++ ++ __ ret(); // return from result handler ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, ++ address runtime_entry) { ++ assert_cond(runtime_entry != NULL); ++ address entry = __ pc(); ++ __ push(state); ++ __ call_VM(noreg, runtime_entry); ++ __ membar(MacroAssembler::AnyAny); ++ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); ++ return entry; ++} ++ ++// Helpers for commoning out cases in the various type of method entries. ++// ++ ++ ++// increment invocation count & check for overflow ++// ++// Note: checking for negative value instead of overflow ++// so we have a 'sticky' overflow test ++// ++// xmethod: method ++// ++void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, ++ Label* profile_method, ++ Label* profile_method_continue) { ++ Label done; ++ // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. ++ if (TieredCompilation) { ++ int increment = InvocationCounter::count_increment; ++ Label no_mdo; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(x10, Address(xmethod, Method::method_data_offset())); ++ __ beqz(x10, no_mdo); ++ // Increment counter in the MDO ++ const Address mdo_invocation_counter(x10, in_bytes(MethodData::invocation_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(x10, in_bytes(MethodData::invoke_mask_offset())); ++ __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, t0, t1, false, overflow); ++ __ j(done); ++ } ++ __ bind(no_mdo); ++ // Increment counter in MethodCounters ++ const Address invocation_counter(t1, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ __ get_method_counters(xmethod, t1, done); ++ const Address mask(t1, in_bytes(MethodCounters::invoke_mask_offset())); ++ __ increment_mask_and_jump(invocation_counter, increment, mask, t0, x11, false, overflow); ++ __ bind(done); ++ } else { // not TieredCompilation ++ const Address backedge_counter(t1, ++ MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset()); ++ const Address invocation_counter(t1, ++ MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset()); ++ ++ __ get_method_counters(xmethod, t1, done); ++ ++ if (ProfileInterpreter) { // %%% Merge this into MethodData* ++ __ lwu(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); ++ __ addw(x11, x11, 1); ++ __ sw(x11, Address(t1, MethodCounters::interpreter_invocation_counter_offset())); ++ } ++ // Update standard invocation counters ++ __ lwu(x11, invocation_counter); ++ __ lwu(x10, backedge_counter); ++ ++ __ addw(x11, x11, InvocationCounter::count_increment); ++ __ andi(x10, x10, InvocationCounter::count_mask_value); ++ ++ __ sw(x11, invocation_counter); ++ __ addw(x10, x10, x11); // add both counters ++ ++ // profile_method is non-null only for interpreted method so ++ // profile_method != NULL == !native_call ++ ++ if (ProfileInterpreter && profile_method != NULL) { ++ // Test to see if we should create a method data oop ++ __ ld(t1, Address(xmethod, Method::method_counters_offset())); ++ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); ++ __ blt(x10, t1, *profile_method_continue); ++ ++ // if no method data exists, go to profile_method ++ __ test_method_data_pointer(t1, *profile_method); ++ } ++ ++ { ++ __ ld(t1, Address(xmethod, Method::method_counters_offset())); ++ __ lwu(t1, Address(t1, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); ++ __ bltu(x10, t1, done); ++ __ j(*overflow); // offset is too large so we have to use j instead of bgeu here ++ } ++ __ bind(done); ++ } ++} ++ ++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { ++ __ mv(c_rarg1, zr); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), c_rarg1); ++ __ j(do_continue); ++} ++ ++// See if we've got enough room on the stack for locals plus overhead ++// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError ++// without going through the signal handler, i.e., reserved and yellow zones ++// will not be made usable. The shadow zone must suffice to handle the ++// overflow. ++// The expression stack grows down incrementally, so the normal guard ++// page mechanism will work for that. ++// ++// NOTE: Since the additional locals are also always pushed (wasn't ++// obvious in generate_method_entry) so the guard should work for them ++// too. ++// ++// Args: ++// x13: number of additional locals this frame needs (what we must check) ++// xmethod: Method* ++// ++// Kills: ++// x10 ++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { ++ ++ // monitor entry size: see picture of stack set ++ // (generate_method_entry) and frame_amd64.hpp ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++ // total overhead size: entry_size + (saved fp through expr stack ++ // bottom). be sure to change this if you add/subtract anything ++ // to/from the overhead area ++ const int overhead_size = ++ -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size; ++ ++ const int page_size = os::vm_page_size(); ++ ++ Label after_frame_check; ++ ++ // see if the frame is greater than one page in size. If so, ++ // then we need to verify there is enough stack space remaining ++ // for the additional locals. ++ __ mv(t0, (page_size - overhead_size) / Interpreter::stackElementSize); ++ __ bleu(x13, t0, after_frame_check); ++ ++ // compute sp as if this were going to be the last frame on ++ // the stack before the red zone ++ ++ // locals + overhead, in bytes ++ __ mv(x10, overhead_size); ++ __ shadd(x10, x13, x10, t0, Interpreter::logStackElementSize); // 2 slots per parameter. ++ ++ const Address stack_limit(xthread, JavaThread::stack_overflow_limit_offset()); ++ __ ld(t0, stack_limit); ++ ++#ifdef ASSERT ++ Label limit_okay; ++ // Verify that thread stack limit is non-zero. ++ __ bnez(t0, limit_okay); ++ __ stop("stack overflow limit is zero"); ++ __ bind(limit_okay); ++#endif ++ ++ // Add stack limit to locals. ++ __ add(x10, x10, t0); ++ ++ // Check against the current stack bottom. ++ __ bgtu(sp, x10, after_frame_check); ++ ++ // Remove the incoming args, peeling the machine SP back to where it ++ // was in the caller. This is not strictly necessary, but unless we ++ // do so the stack frame may have a garbage FP; this ensures a ++ // correct call stack that we can always unwind. The ANDI should be ++ // unnecessary because the sender SP in x30 is always aligned, but ++ // it doesn't hurt. ++ __ andi(sp, x30, -16); ++ ++ // Note: the restored frame is not necessarily interpreted. ++ // Use the shared runtime version of the StackOverflowError. ++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); ++ __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry())); ++ ++ // all done with frame size check ++ __ bind(after_frame_check); ++} ++ ++// Allocate monitor and lock method (asm interpreter) ++// ++// Args: ++// xmethod: Method* ++// xlocals: locals ++// ++// Kills: ++// x10 ++// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs) ++// t0, t1 (temporary regs) ++void TemplateInterpreterGenerator::lock_method() { ++ // synchronize method ++ const Address access_flags(xmethod, Method::access_flags_offset()); ++ const Address monitor_block_top(fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++#ifdef ASSERT ++ __ lwu(x10, access_flags); ++ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method doesn't need synchronization", false); ++#endif // ASSERT ++ ++ // get synchronization object ++ { ++ Label done; ++ __ lwu(x10, access_flags); ++ __ andi(t0, x10, JVM_ACC_STATIC); ++ // get receiver (assume this is frequent case) ++ __ ld(x10, Address(xlocals, Interpreter::local_offset_in_bytes(0))); ++ __ beqz(t0, done); ++ __ load_mirror(x10, xmethod); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ __ bnez(x10, L); ++ __ stop("synchronization object is NULL"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ ++ __ bind(done); ++ } ++ ++ // add space for monitor & lock ++ __ add(sp, sp, - entry_size); // add space for a monitor entry ++ __ add(esp, esp, - entry_size); ++ __ mv(t0, esp); ++ __ sd(t0, monitor_block_top); // set new monitor block top ++ // store object ++ __ sd(x10, Address(esp, BasicObjectLock::obj_offset_in_bytes())); ++ __ mv(c_rarg1, esp); // object address ++ __ lock_object(c_rarg1); ++} ++ ++// Generate a fixed interpreter frame. This is identical setup for ++// interpreted methods and for native methods hence the shared code. ++// ++// Args: ++// ra: return address ++// xmethod: Method* ++// xlocals: pointer to locals ++// xcpool: cp cache ++// stack_pointer: previous sp ++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { ++ // initialize fixed part of activation frame ++ if (native_call) { ++ __ add(esp, sp, - 14 * wordSize); ++ __ mv(xbcp, zr); ++ __ add(sp, sp, - 14 * wordSize); ++ // add 2 zero-initialized slots for native calls ++ __ sd(zr, Address(sp, 13 * wordSize)); ++ __ sd(zr, Address(sp, 12 * wordSize)); ++ } else { ++ __ add(esp, sp, - 12 * wordSize); ++ __ ld(t0, Address(xmethod, Method::const_offset())); // get ConstMethod ++ __ add(xbcp, t0, in_bytes(ConstMethod::codes_offset())); // get codebase ++ __ add(sp, sp, - 12 * wordSize); ++ } ++ __ sd(xbcp, Address(sp, wordSize)); ++ __ sd(esp, Address(sp, 0)); ++ ++ if (ProfileInterpreter) { ++ Label method_data_continue; ++ __ ld(t0, Address(xmethod, Method::method_data_offset())); ++ __ beqz(t0, method_data_continue); ++ __ la(t0, Address(t0, in_bytes(MethodData::data_offset()))); ++ __ bind(method_data_continue); ++ } ++ ++ __ sd(xmethod, Address(sp, 7 * wordSize)); ++ __ sd(ProfileInterpreter ? t0 : zr, Address(sp, 6 * wordSize)); ++ ++ // Get mirror and store it in the frame as GC root for this Method* ++#if INCLUDE_SHENANDOAHGC ++ if (UseShenandoahGC) { ++ __ load_mirror(x28, xmethod); ++ __ sd(x28, Address(sp, 4 * wordSize)); ++ } else ++#endif ++ { ++ __ load_mirror(t0, xmethod); ++ __ sd(t0, Address(sp, 4 * wordSize)); ++ } ++ __ sd(zr, Address(sp, 5 * wordSize)); ++ ++ __ load_constant_pool_cache(xcpool, xmethod); ++ __ sd(xcpool, Address(sp, 3 * wordSize)); ++ __ sd(xlocals, Address(sp, 2 * wordSize)); ++ ++ __ sd(ra, Address(sp, 11 * wordSize)); ++ __ sd(fp, Address(sp, 10 * wordSize)); ++ __ la(fp, Address(sp, 12 * wordSize)); // include ra & fp ++ ++ // set sender sp ++ // leave last_sp as null ++ __ sd(x30, Address(sp, 9 * wordSize)); ++ __ sd(zr, Address(sp, 8 * wordSize)); ++ ++ // Move SP out of the way ++ if (!native_call) { ++ __ load_max_stack(t0, xmethod); ++ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 2); ++ __ slli(t0, t0, 3); ++ __ sub(t0, sp, t0); ++ __ andi(sp, t0, -16); ++ } ++} ++ ++// End of helpers ++ ++// Various method entries ++//------------------------------------------------------------------------------------------------------------------------ ++// ++// ++ ++// Method entry for java.lang.ref.Reference.get. ++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { ++ // Code: _aload_0, _getfield, _areturn ++ // parameter size = 1 ++ // ++ // The code that gets generated by this routine is split into 2 parts: ++ // 1. The "intrinsified" code for G1 (or any SATB based GC), ++ // 2. The slow path - which is an expansion of the regular method entry. ++ // ++ // Notes:- ++ // * In the G1 code we do not check whether we need to block for ++ // a safepoint. If G1 is enabled then we must execute the specialized ++ // code for Reference.get (except when the Reference object is null) ++ // so that we can log the value in the referent field with an SATB ++ // update buffer. ++ // If the code for the getfield template is modified so that the ++ // G1 pre-barrier code is executed when the current method is ++ // Reference.get() then going through the normal method entry ++ // will be fine. ++ // * The G1 code can, however, check the receiver object (the instance ++ // of java.lang.Reference) and jump to the slow path if null. If the ++ // Reference object is null then we obviously cannot fetch the referent ++ // and so we don't need to call the G1 pre-barrier. Thus we can use the ++ // regular method entry code to generate the NPE. ++ // ++ // This code is based on generate_accessor_entry. ++ // ++ // xmethod: Method* ++ // x30: senderSP must preserve for slow path, set SP to it on fast path ++ ++ // RA is live. It must be saved around calls. ++ ++ address entry = __ pc(); ++ ++ const int referent_offset = java_lang_ref_Reference::referent_offset; ++ guarantee(referent_offset > 0, "referent offset not initialized"); ++ ++ Label slow_path; ++ const Register local_0 = c_rarg0; ++ // Check if local 0 != NULL ++ // If the receiver is null then it is OK to jump to the slow path. ++ __ ld(local_0, Address(esp, 0)); ++ __ beqz(local_0, slow_path); ++ ++ __ mv(x9, x30); // Move senderSP to a callee-saved register ++ ++ // Load the value of the referent field. ++ const Address field_address(local_0, referent_offset); ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->load_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ t1, /*tmp2*/ t0); ++ ++ // areturn ++ __ andi(sp, x9, -16); // done with stack ++ __ ret(); ++ ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); ++ return entry; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.update(int crc, int b) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_update_entry() { ++ // TODO: Unimplemented generate_CRC32_update_entry ++ return 0; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) ++ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ // TODO: Unimplemented generate_CRC32_updateBytes_entry ++ return 0; ++} ++ ++/** ++ * Method entry for intrinsic-candidate (non-native) methods: ++ * int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) ++ * int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) ++ * Unlike CRC32, CRC32C does not have any methods marked as native ++ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses ++ */ ++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ // TODO: Unimplemented generate_CRC32C_updateBytes_entry ++ return 0; ++} ++ ++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { ++ // Bang each page in the shadow zone. We can't assume it's been done for ++ // an interpreter frame with greater than a page of locals, so each page ++ // needs to be checked. Only true for non-native. ++ if (UseStackBanging) { ++ const int n_shadow_pages = checked_cast(JavaThread::stack_shadow_zone_size()) / os::vm_page_size(); ++ const int start_page = native_call ? n_shadow_pages : 1; ++ const int page_size = os::vm_page_size(); ++ for (int pages = start_page; pages <= n_shadow_pages ; pages++) { ++ __ sub(t1, sp, pages * page_size); ++ __ sd(zr, Address(t1)); ++ } ++ } ++} ++ ++// Interpreter stub for calling a native method. (asm interpreter) ++// This sets up a somewhat different looking stack for calling the ++// native method than the typical interpreter frame setup. ++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ ++ // x11: Method* ++ // x30: sender sp ++ ++ address entry_point = __ pc(); ++ ++ const Address constMethod (xmethod, Method::const_offset()); ++ const Address access_flags (xmethod, Method::access_flags_offset()); ++ const Address size_of_parameters(x12, ConstMethod:: ++ size_of_parameters_offset()); ++ ++ // get parameter size (always needed) ++ __ ld(x12, constMethod); ++ __ load_unsigned_short(x12, size_of_parameters); ++ ++ // Native calls don't need the stack size check since they have no ++ // expression stack and the arguments are already on the stack and ++ // we only add a handful of words to the stack. ++ ++ // xmethod: Method* ++ // x12: size of parameters ++ // x30: sender sp ++ ++ // for natives the size of locals is zero ++ ++ // compute beginning of parameters (xlocals) ++ __ shadd(xlocals, x12, esp, xlocals, 3); ++ __ addi(xlocals, xlocals, -wordSize); ++ ++ // Pull SP back to minimum size: this avoids holes in the stack ++ __ andi(sp, esp, -16); ++ ++ // initialize fixed part of activation frame ++ generate_fixed_frame(true); ++ ++ // make sure method is native & not abstract ++#ifdef ASSERT ++ __ lwu(x10, access_flags); ++ __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute non-native method as native", false); ++ __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter"); ++#endif ++ ++ // Since at this point in the method invocation the exception ++ // handler would try to exit the monitor of synchronized methods ++ // which hasn't been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation ++ // will check this flag. ++ ++ const Address do_not_unlock_if_synchronized(xthread, ++ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ __ mv(t1, true); ++ __ sb(t1, do_not_unlock_if_synchronized); ++ ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++ ++ bang_stack_shadow_pages(true); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++ __ sb(zr, do_not_unlock_if_synchronized); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ __ lwu(x10, access_flags); ++ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization"); ++#endif ++ } ++ ++ // start execution ++#ifdef ASSERT ++ __ verify_frame_setup(); ++#endif ++ ++ // jvmti support ++ __ notify_method_entry(); ++ ++ // work registers ++ const Register t = x18; ++ const Register result_handler = x19; ++ ++ // allocate space for parameters ++ __ ld(t, Address(xmethod, Method::const_offset())); ++ __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); ++ ++ __ slli(t, t, Interpreter::logStackElementSize); ++ __ sub(x30, esp, t); ++ __ andi(sp, x30, -16); ++ __ mv(esp, x30); ++ ++ // get signature handler ++ { ++ Label L; ++ __ ld(t, Address(xmethod, Method::signature_handler_offset())); ++ __ bnez(t, L); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), ++ xmethod); ++ __ ld(t, Address(xmethod, Method::signature_handler_offset())); ++ __ bind(L); ++ } ++ ++ // call signature handler ++ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == xlocals, ++ "adjust this code"); ++ assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp, ++ "adjust this code"); ++ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t0, ++ "adjust this code"); ++ ++ // The generated handlers do not touch xmethod (the method). ++ // However, large signatures cannot be cached and are generated ++ // each time here. The slow-path generator can do a GC on return, ++ // so we must reload it after the call. ++ __ jalr(t); ++ __ get_method(xmethod); // slow path can do a GC, reload xmethod ++ ++ ++ // result handler is in x10 ++ // set result handler ++ __ mv(result_handler, x10); ++ // pass mirror handle if static call ++ { ++ Label L; ++ __ lwu(t, Address(xmethod, Method::access_flags_offset())); ++ __ andi(t0, t, JVM_ACC_STATIC); ++ __ beqz(t0, L); ++ // get mirror ++ __ load_mirror(t, xmethod); ++ // copy mirror into activation frame ++ __ sd(t, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize)); ++ // pass handle to mirror ++ __ addi(c_rarg1, fp, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ bind(L); ++ } ++ ++ // get native function entry point in x28 ++ { ++ Label L; ++ __ ld(x28, Address(xmethod, Method::native_function_offset())); ++ address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); ++ __ mv(t1, unsatisfied); ++ __ ld(t1, t1); ++ __ bne(x28, t1, L); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), ++ xmethod); ++ __ get_method(xmethod); ++ __ ld(x28, Address(xmethod, Method::native_function_offset())); ++ __ bind(L); ++ } ++ ++ // pass JNIEnv ++ __ add(c_rarg0, xthread, in_bytes(JavaThread::jni_environment_offset())); ++ ++ // It is enough that the pc() points into the right code ++ // segment. It does not have to be the correct return pc. ++ Label native_return; ++ __ set_last_Java_frame(esp, fp, native_return, x30); ++ ++ // change thread state ++#ifdef ASSERT ++ { ++ Label L; ++ __ lwu(t, Address(xthread, JavaThread::thread_state_offset())); ++ __ addi(t0, zr, (u1)_thread_in_Java); ++ __ beq(t, t0, L); ++ __ stop("Wrong thread state in native stub"); ++ __ bind(L); ++ } ++#endif ++ ++ // Change state to native ++ __ la(t1, Address(xthread, JavaThread::thread_state_offset())); ++ __ mv(t0, _thread_in_native); ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ __ sw(t0, Address(t1)); ++ ++ // Call the native method. ++ __ jalr(x28); ++ __ bind(native_return); ++ __ get_method(xmethod); ++ // result potentially in x10 or f10 ++ ++ // make room for the pushes we're about to do ++ __ sub(t0, esp, 4 * wordSize); ++ __ andi(sp, t0, -16); ++ ++ // NOTE: The order of these pushes is known to frame::interpreter_frame_result ++ // in order to extract the result of a method call. If the order of these ++ // pushes change or anything else is added to the stack then the code in ++ // interpreter_frame_result must also change. ++ __ push(dtos); ++ __ push(ltos); ++ ++ // change thread state ++ // Force all preceding writes to be observed prior to thread state change ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ ++ __ mv(t0, _thread_in_native_trans); ++ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); ++ ++ if (os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ membar(MacroAssembler::AnyAny); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(xthread, t0, t1); ++ } ++ } ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ Label L, Continue; ++ __ safepoint_poll_acquire(L); ++ __ lwu(t1, Address(xthread, JavaThread::suspend_flags_offset())); ++ __ beqz(t1, Continue); ++ __ bind(L); ++ ++ // Don't use call_VM as it will see a possible pending exception ++ // and forward it and never return here preventing us from ++ // clearing _last_native_pc down below. So we do a runtime call by ++ // hand. ++ // ++ __ mv(c_rarg0, xthread); ++ __ mv(t1, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); ++ __ jalr(t1); ++ __ get_method(xmethod); ++ __ reinit_heapbase(); ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ // Force all preceding writes to be observed prior to thread state change ++ __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); ++ ++ __ mv(t0, _thread_in_Java); ++ __ sw(t0, Address(xthread, JavaThread::thread_state_offset())); ++ ++ // reset_last_Java_frame ++ __ reset_last_Java_frame(true); ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ // reset handle block ++ __ ld(t, Address(xthread, JavaThread::active_handles_offset())); ++ __ sd(zr, Address(t, JNIHandleBlock::top_offset_in_bytes())); ++ ++ // If result is an oop unbox and store it in frame where gc will see it ++ // and result handler will pick it up ++ ++ { ++ Label no_oop, not_weak, store_result; ++ __ la(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT))); ++ __ bne(t, result_handler, no_oop); ++ // Unbox oop result, e.g. JNIHandles::resolve result. ++ __ pop(ltos); ++ __ resolve_jobject(x10, xthread, t); ++ __ sd(x10, Address(fp, frame::interpreter_frame_oop_temp_offset * wordSize)); ++ // keep stack depth as expected by pushing oop which will eventually be discarded ++ __ push(ltos); ++ __ bind(no_oop); ++ } ++ ++ { ++ Label no_reguard; ++ __ lwu(t0, Address(xthread, in_bytes(JavaThread::stack_guard_state_offset()))); ++ __ addi(t1, zr, JavaThread::stack_guard_yellow_reserved_disabled); ++ __ bne(t0, t1, no_reguard); ++ ++ __ push_call_clobbered_registers(); ++ __ mv(c_rarg0, xthread); ++ __ mv(t1, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); ++ __ jalr(t1); ++ __ pop_call_clobbered_registers(); ++ __ bind(no_reguard); ++ } ++ ++ // The method register is junk from after the thread_in_native transition ++ // until here. Also can't call_VM until the bcp has been ++ // restored. Need bcp for throwing exception below so get it now. ++ __ get_method(xmethod); ++ ++ // restore bcp to have legal interpreter frame, i.e., bci == 0 <=> ++ // xbcp == code_base() ++ __ ld(xbcp, Address(xmethod, Method::const_offset())); // get ConstMethod* ++ __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); // get codebase ++ // handle exceptions (exception handling will handle unlocking!) ++ { ++ Label L; ++ __ ld(t0, Address(xthread, Thread::pending_exception_offset())); ++ __ beqz(t0, L); ++ // Note: At some point we may want to unify this with the code ++ // used in call_VM_base(); i.e., we should use the ++ // StubRoutines::forward_exception code. For now this doesn't work ++ // here because the sp is not correctly set at this point. ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ ++ // do unlocking if necessary ++ { ++ Label L; ++ __ lwu(t, Address(xmethod, Method::access_flags_offset())); ++ __ andi(t0, t, JVM_ACC_SYNCHRONIZED); ++ __ beqz(t0, L); ++ // the code below should be shared with interpreter macro ++ // assembler implementation ++ { ++ Label unlock; ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object ++ // has not been unlocked by an explicit monitorexit bytecode. ++ ++ // monitor expect in c_rarg1 for slow unlock path ++ __ la(c_rarg1, Address(fp, // address of first monitor ++ (intptr_t)(frame::interpreter_frame_initial_sp_offset * ++ wordSize - sizeof(BasicObjectLock)))); ++ ++ __ ld(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); ++ __ bnez(t, unlock); ++ ++ // Entry already unlocked, need to throw exception ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ __ bind(unlock); ++ __ unlock_object(c_rarg1); ++ } ++ __ bind(L); ++ } ++ ++ // jvmti support ++ // Note: This must happen _after_ handling/throwing any exceptions since ++ // the exception handler code notifies the runtime of method exits ++ // too. If this happens before, method entry/exit notifications are ++ // not properly paired (was bug - gri 11/22/99). ++ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); ++ ++ __ pop(ltos); ++ __ pop(dtos); ++ ++ __ jalr(result_handler); ++ ++ // remove activation ++ __ ld(esp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp ++ // remove frame anchor ++ __ leave(); ++ ++ // restore sender sp ++ __ mv(sp, esp); ++ ++ __ ret(); ++ ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ } ++ ++ return entry_point; ++} ++ ++// ++// Generic interpreted method entry to (asm) interpreter ++// ++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { ++ ++ // determine code generation flags ++ const bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ ++ // t0: sender sp ++ address entry_point = __ pc(); ++ ++ const Address constMethod(xmethod, Method::const_offset()); ++ const Address access_flags(xmethod, Method::access_flags_offset()); ++ const Address size_of_parameters(x13, ++ ConstMethod::size_of_parameters_offset()); ++ const Address size_of_locals(x13, ConstMethod::size_of_locals_offset()); ++ ++ // get parameter size (always needed) ++ // need to load the const method first ++ __ ld(x13, constMethod); ++ __ load_unsigned_short(x12, size_of_parameters); ++ ++ // x12: size of parameters ++ ++ __ load_unsigned_short(x13, size_of_locals); // get size of locals in words ++ __ sub(x13, x13, x12); // x13 = no. of additional locals ++ ++ // see if we've got enough room on the stack for locals plus overhead. ++ generate_stack_overflow_check(); ++ ++ // compute beginning of parameters (xlocals) ++ __ shadd(xlocals, x12, esp, t1, 3); ++ __ add(xlocals, xlocals, -wordSize); ++ ++ // Make room for additional locals ++ __ slli(t1, x13, 3); ++ __ sub(t0, esp, t1); ++ ++ // Padding between locals and fixed part of activation frame to ensure ++ // SP is always 16-byte aligned. ++ __ andi(sp, t0, -16); ++ ++ // x13 - # of additional locals ++ // allocate space for locals ++ // explicitly initialize locals ++ { ++ Label exit, loop; ++ __ blez(x13, exit); // do nothing if x13 <= 0 ++ __ bind(loop); ++ __ sd(zr, Address(t0)); ++ __ add(t0, t0, wordSize); ++ __ add(x13, x13, -1); // until everything initialized ++ __ bnez(x13, loop); ++ __ bind(exit); ++ } ++ ++ // And the base dispatch table ++ __ get_dispatch(); ++ ++ // initialize fixed part of activation frame ++ generate_fixed_frame(false); ++ ++ // make sure method is not native & not abstract ++#ifdef ASSERT ++ __ lwu(x10, access_flags); ++ __ verify_access_flags(x10, JVM_ACC_NATIVE, "tried to execute native method as non-native"); ++ __ verify_access_flags(x10, JVM_ACC_ABSTRACT, "tried to execute abstract method in interpreter"); ++#endif ++ ++ // Since at this point in the method invocation the exception ++ // handler would try to exit the monitor of synchronized methods ++ // which hasn't been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation ++ // will check this flag. ++ ++ const Address do_not_unlock_if_synchronized(xthread, ++ in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ __ mv(t1, true); ++ __ sb(t1, do_not_unlock_if_synchronized); ++ ++ Label no_mdp; ++ const Register mdp = x13; ++ __ ld(mdp, Address(xmethod, Method::method_data_offset())); ++ __ beqz(mdp, no_mdp); ++ __ add(mdp, mdp, in_bytes(MethodData::data_offset())); ++ __ profile_parameters_type(mdp, x11, x12, x14); // use x11, x12, x14 as tmp registers ++ __ bind(no_mdp); ++ ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ Label profile_method; ++ Label profile_method_continue; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, ++ &profile_method, ++ &profile_method_continue); ++ if (ProfileInterpreter) { ++ __ bind(profile_method_continue); ++ } ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++ ++ bang_stack_shadow_pages(false); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++ __ sb(zr, do_not_unlock_if_synchronized); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ // Allocate monitor and lock method ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ __ lwu(x10, access_flags); ++ __ verify_access_flags(x10, JVM_ACC_SYNCHRONIZED, "method needs synchronization"); ++#endif ++ } ++ ++ // start execution ++#ifdef ASSERT ++ __ verify_frame_setup(); ++#endif ++ ++ // jvmti support ++ __ notify_method_entry(); ++ ++ __ dispatch_next(vtos); ++ ++ // invocation counter overflow ++ if (inc_counter) { ++ if (ProfileInterpreter) { ++ // We have decided to profile this method in the interpreter ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ // don't think we need this ++ __ get_method(x11); ++ __ jal(profile_method_continue); ++ } ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ } ++ ++ return entry_point; ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateInterpreterGenerator::generate_throw_exception() { ++ // Entry point in previous activation (i.e., if the caller was ++ // interpreted) ++ Interpreter::_rethrow_exception_entry = __ pc(); ++ // Restore sp to interpreter_frame_last_sp even though we are going ++ // to empty the expression stack for the exception processing. ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // x10: exception ++ // x13: return address/pc that threw exception ++ __ restore_bcp(); // xbcp points to call/send ++ __ restore_locals(); ++ __ restore_constant_pool_cache(); ++ __ reinit_heapbase(); // restore xheapbase as heapbase. ++ __ get_dispatch(); ++ ++ // Entry point for exceptions thrown within interpreter code ++ Interpreter::_throw_exception_entry = __ pc(); ++ // If we came here via a NullPointerException on the receiver of a ++ // method, xthread may be corrupt. ++ __ get_method(xmethod); ++ // expression stack is undefined here ++ // x10: exception ++ // xbcp: exception bcp ++ __ verify_oop(x10); ++ __ mv(c_rarg1, x10); ++ ++ // expression stack must be empty before entering the VM in case of ++ // an exception ++ __ empty_expression_stack(); ++ // find exception handler address and preserve exception oop ++ __ call_VM(x13, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::exception_handler_for_exception), ++ c_rarg1); ++ ++ // Calculate stack limit ++ __ ld(t0, Address(xmethod, Method::const_offset())); ++ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); ++ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4); ++ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); ++ __ slli(t0, t0, 3); ++ __ sub(t0, t1, t0); ++ __ andi(sp, t0, -16); ++ ++ // x10: exception handler entry point ++ // x13: preserved exception oop ++ // xbcp: bcp for exception handler ++ __ push_ptr(x13); // push exception which is now the only value on the stack ++ __ jr(x10); // jump to exception handler (may be _remove_activation_entry!) ++ ++ // If the exception is not handled in the current frame the frame is ++ // removed and the exception is rethrown (i.e. exception ++ // continuation is _rethrow_exception). ++ // ++ // Note: At this point the bci is still the bxi for the instruction ++ // which caused the exception and the expression stack is ++ // empty. Thus, for any VM calls at this point, GC will find a legal ++ // oop map (with empty expression stack). ++ ++ // ++ // JVMTI PopFrame support ++ // ++ ++ Interpreter::_remove_activation_preserving_args_entry = __ pc(); ++ __ empty_expression_stack(); ++ // Set the popframe_processing bit in pending_popframe_condition ++ // indicating that we are currently handling popframe, so that ++ // call_VMs that may happen later do not trigger new popframe ++ // handling cycles. ++ __ lwu(x13, Address(xthread, JavaThread::popframe_condition_offset())); ++ __ ori(x13, x13, JavaThread::popframe_processing_bit); ++ __ sw(x13, Address(xthread, JavaThread::popframe_condition_offset())); ++ ++ { ++ // Check to see whether we are returning to a deoptimized frame. ++ // (The PopFrame call ensures that the caller of the popped frame is ++ // either interpreted or compiled and deoptimizes it if compiled.) ++ // In this case, we can't call dispatch_next() after the frame is ++ // popped, but instead must save the incoming arguments and restore ++ // them after deoptimization has occurred. ++ // ++ // Note that we don't compare the return PC against the ++ // deoptimization blob's unpack entry because of the presence of ++ // adapter frames in C2. ++ Label caller_not_deoptimized; ++ __ ld(c_rarg1, Address(fp, frame::return_addr_offset * wordSize)); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), c_rarg1); ++ __ bnez(x10, caller_not_deoptimized); ++ ++ // Compute size of arguments for saving when returning to ++ // deoptimized caller ++ __ get_method(x10); ++ __ ld(x10, Address(x10, Method::const_offset())); ++ __ load_unsigned_short(x10, Address(x10, in_bytes(ConstMethod:: ++ size_of_parameters_offset()))); ++ __ slli(x10, x10, Interpreter::logStackElementSize); ++ __ restore_locals(); ++ __ sub(xlocals, xlocals, x10); ++ __ add(xlocals, xlocals, wordSize); ++ // Save these arguments ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ++ Deoptimization:: ++ popframe_preserve_args), ++ xthread, x10, xlocals); ++ ++ __ remove_activation(vtos, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); ++ ++ // Inform deoptimization that it is responsible for restoring ++ // these arguments ++ __ mv(t0, JavaThread::popframe_force_deopt_reexecution_bit); ++ __ sw(t0, Address(xthread, JavaThread::popframe_condition_offset())); ++ ++ // Continue in deoptimization handler ++ __ ret(); ++ ++ __ bind(caller_not_deoptimized); ++ } ++ ++ __ remove_activation(vtos, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); ++ ++ // Restore the last_sp and null it out ++ __ ld(esp, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize)); ++ ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ restore_constant_pool_cache(); ++ __ get_method(xmethod); ++ __ get_dispatch(); ++ ++ // The method data pointer was incremented already during ++ // call profiling. We have to restore the mdp for the current bcp. ++ if (ProfileInterpreter) { ++ __ set_method_data_pointer_for_bcp(); ++ } ++ ++ // Clear the popframe condition flag ++ __ sw(zr, Address(xthread, JavaThread::popframe_condition_offset())); ++ assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive"); ++ ++#if INCLUDE_JVMTI ++ { ++ Label L_done; ++ ++ __ lbu(t0, Address(xbcp, 0)); ++ __ mv(t1, Bytecodes::_invokestatic); ++ __ bne(t1, t0, L_done); ++ ++ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. ++ // Detect such a case in the InterpreterRuntime function and return the member name argument,or NULL. ++ ++ __ ld(c_rarg0, Address(xlocals, 0)); ++ __ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null),c_rarg0, xmethod, xbcp); ++ ++ __ beqz(x10, L_done); ++ ++ __ sd(x10, Address(esp, 0)); ++ __ bind(L_done); ++ } ++#endif // INCLUDE_JVMTI ++ ++ // Restore machine SP ++ __ ld(t0, Address(xmethod, Method::const_offset())); ++ __ lhu(t0, Address(t0, ConstMethod::max_stack_offset())); ++ __ add(t0, t0, frame::interpreter_frame_monitor_size() + 4); ++ __ ld(t1, Address(fp, frame::interpreter_frame_initial_sp_offset * wordSize)); ++ __ slliw(t0, t0, 3); ++ __ sub(t0, t1, t0); ++ __ andi(sp, t0, -16); ++ ++ __ dispatch_next(vtos); ++ // end of PopFrame support ++ ++ Interpreter::_remove_activation_entry = __ pc(); ++ ++ // preserve exception over this code sequence ++ __ pop_ptr(x10); ++ __ sd(x10, Address(xthread, JavaThread::vm_result_offset())); ++ // remove the activation (without doing throws on illegalMonitorExceptions) ++ __ remove_activation(vtos, false, true, false); ++ // restore exception ++ __ get_vm_result(x10, xthread); ++ ++ // In between activations - previous activation type unknown yet ++ // compute continuation point - the continuation point expects the ++ // following registers set up: ++ // ++ // x10: exception ++ // ra: return address/pc that threw exception ++ // sp: expression stack of caller ++ // fp: fp of caller ++ // FIXME: There's no point saving RA here because VM calls don't trash it ++ __ sub(sp, sp, 2 * wordSize); ++ __ sd(x10, Address(sp, 0)); // save exception ++ __ sd(ra, Address(sp, wordSize)); // save return address ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ++ SharedRuntime::exception_handler_for_return_address), ++ xthread, ra); ++ __ mv(x11, x10); // save exception handler ++ __ ld(x10, Address(sp, 0)); // restore exception ++ __ ld(ra, Address(sp, wordSize)); // restore return address ++ __ add(sp, sp, 2 * wordSize); ++ // We might be returning to a deopt handler that expects x13 to ++ // contain the exception pc ++ __ mv(x13, ra); ++ // Note that an "issuing PC" is actually the next PC after the call ++ __ jr(x11); // jump to exception ++ // handler of caller ++} ++ ++// ++// JVMTI ForceEarlyReturn support ++// ++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { ++ address entry = __ pc(); ++ ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ empty_expression_stack(); ++ __ load_earlyret_value(state); ++ ++ __ ld(t0, Address(xthread, JavaThread::jvmti_thread_state_offset())); ++ Address cond_addr(t0, JvmtiThreadState::earlyret_state_offset()); ++ ++ // Clear the earlyret state ++ assert(JvmtiThreadState::earlyret_inactive == 0, "should be"); ++ __ sd(zr, cond_addr); ++ ++ __ remove_activation(state, ++ false, /* throw_monitor_exception */ ++ false, /* install_monitor_exception */ ++ true); /* notify_jvmdi */ ++ __ ret(); ++ ++ return entry; ++} ++// end of ForceEarlyReturn support ++ ++//----------------------------------------------------------------------------- ++// Helper for vtos entry point generation ++ ++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, ++ address& bep, ++ address& cep, ++ address& sep, ++ address& aep, ++ address& iep, ++ address& lep, ++ address& fep, ++ address& dep, ++ address& vep) { ++ assert(t != NULL && t->is_valid() && t->tos_in() == vtos, "illegal template"); ++ Label L; ++ aep = __ pc(); __ push_ptr(); __ j(L); ++ fep = __ pc(); __ push_f(); __ j(L); ++ dep = __ pc(); __ push_d(); __ j(L); ++ lep = __ pc(); __ push_l(); __ j(L); ++ bep = cep = sep = ++ iep = __ pc(); __ push_i(); ++ vep = __ pc(); ++ __ bind(L); ++ generate_and_dispatch(t); ++} ++ ++//----------------------------------------------------------------------------- ++ ++// Non-product code ++#ifndef PRODUCT ++address TemplateInterpreterGenerator::generate_trace_code(TosState state) { ++ address entry = __ pc(); ++ ++ __ push_reg(ra); ++ __ push(state); ++ __ push_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp); ++ __ mv(c_rarg2, x10); // Pass itos ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), c_rarg1, c_rarg2, c_rarg3); ++ __ pop_reg(RegSet::range(x10, x17) + RegSet::range(x5, x7) + RegSet::range(x28, x31), sp); ++ __ pop(state); ++ __ pop_reg(ra); ++ __ ret(); // return from result handler ++ ++ return entry; ++} ++ ++void TemplateInterpreterGenerator::count_bytecode() { ++ __ push_reg(t0); ++ __ push_reg(x10); ++ __ mv(x10, (address) &BytecodeCounter::_counter_value); ++ __ mv(t0, 1); ++ __ amoadd_d(zr, x10, t0, Assembler::aqrl); ++ __ pop_reg(x10); ++ __ pop_reg(t0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; } ++ ++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; } ++ ++void TemplateInterpreterGenerator::trace_bytecode(Template* t) { ++ // Call a little run-time stub to avoid blow-up for each bytecode. ++ // The run-time runtime saves the right registers, depending on ++ // the tosca in-state for the given template. ++ ++ assert(Interpreter::trace_code(t->tos_in()) != NULL, "entry must have been generated"); ++ __ jal(Interpreter::trace_code(t->tos_in())); ++ __ reinit_heapbase(); ++} ++ ++void TemplateInterpreterGenerator::stop_interpreter_at() { ++ Label L; ++ __ push_reg(t0); ++ __ mv(t0, (address) &BytecodeCounter::_counter_value); ++ __ ld(t0, Address(t0)); ++ __ mv(t1, StopInterpreterAt); ++ __ bne(t0, t1, L); ++ __ ebreak(); ++ __ bind(L); ++ __ pop_reg(t0); ++} ++ ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.cpp b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +new file mode 100644 +index 000000000..8e6e7dee5 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.cpp +@@ -0,0 +1,4028 @@ ++/* ++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateTable.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/methodData.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++ ++#define __ _masm-> ++ ++// Platform-dependent initialization ++ ++void TemplateTable::pd_initialize() { ++ // No riscv specific initialization ++} ++ ++// Address computation: local variables ++ ++static inline Address iaddress(int n) { ++ return Address(xlocals, Interpreter::local_offset_in_bytes(n)); ++} ++ ++static inline Address laddress(int n) { ++ return iaddress(n + 1); ++} ++ ++static inline Address faddress(int n) { ++ return iaddress(n); ++} ++ ++static inline Address daddress(int n) { ++ return laddress(n); ++} ++ ++static inline Address aaddress(int n) { ++ return iaddress(n); ++} ++ ++static inline Address iaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++ _masm->shadd(temp, r, xlocals, temp, 3); ++ return Address(temp, 0); ++} ++ ++static inline Address laddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++ _masm->shadd(temp, r, xlocals, temp, 3); ++ return Address(temp, Interpreter::local_offset_in_bytes(1));; ++} ++ ++static inline Address faddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++ return iaddress(r, temp, _masm); ++} ++ ++static inline Address daddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++ return laddress(r, temp, _masm); ++} ++ ++static inline Address aaddress(Register r, Register temp, InterpreterMacroAssembler* _masm) { ++ return iaddress(r, temp, _masm); ++} ++ ++// At top of Java expression stack which may be different than esp(). It ++// isn't for category 1 objects. ++static inline Address at_tos () { ++ return Address(esp, Interpreter::expr_offset_in_bytes(0)); ++} ++ ++static inline Address at_tos_p1() { ++ return Address(esp, Interpreter::expr_offset_in_bytes(1)); ++} ++ ++static inline Address at_tos_p2() { ++ return Address(esp, Interpreter::expr_offset_in_bytes(2)); ++} ++ ++static inline Address at_tos_p3() { ++ return Address(esp, Interpreter::expr_offset_in_bytes(3)); ++} ++ ++static inline Address at_tos_p4() { ++ return Address(esp, Interpreter::expr_offset_in_bytes(4)); ++} ++ ++static inline Address at_tos_p5() { ++ return Address(esp, Interpreter::expr_offset_in_bytes(5)); ++} ++ ++// Miscelaneous helper routines ++// Store an oop (or NULL) at the Address described by obj. ++// If val == noreg this means store a NULL ++static void do_oop_store(InterpreterMacroAssembler* _masm, ++ Address dst, ++ Register val, ++ DecoratorSet decorators) { ++ assert(val == noreg || val == x10, "parameter is just for looks"); ++ __ store_heap_oop(dst, val, x29, x11, x13, decorators); ++} ++ ++static void do_oop_load(InterpreterMacroAssembler* _masm, ++ Address src, ++ Register dst, ++ DecoratorSet decorators) { ++ __ load_heap_oop(dst, src, x7, x11, decorators); ++} ++ ++Address TemplateTable::at_bcp(int offset) { ++ assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); ++ return Address(xbcp, offset); ++} ++ ++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, ++ Register temp_reg, bool load_bc_into_bc_reg/*=true*/, ++ int byte_no) ++{ ++ if (!RewriteBytecodes) { return; } ++ Label L_patch_done; ++ ++ switch (bc) { ++ case Bytecodes::_fast_aputfield: // fall through ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_dputfield: // fall through ++ case Bytecodes::_fast_fputfield: // fall through ++ case Bytecodes::_fast_iputfield: // fall through ++ case Bytecodes::_fast_lputfield: // fall through ++ case Bytecodes::_fast_sputfield: { ++ // We skip bytecode quickening for putfield instructions when ++ // the put_code written to the constant pool cache is zero. ++ // This is required so that every execution of this instruction ++ // calls out to InterpreterRuntime::resolve_get_put to do ++ // additional, required work. ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ assert(load_bc_into_bc_reg, "we use bc_reg as temp"); ++ __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1); ++ __ mv(bc_reg, bc); ++ __ beqz(temp_reg, L_patch_done); ++ break; ++ } ++ default: ++ assert(byte_no == -1, "sanity"); ++ // the pair bytecodes have already done the load. ++ if (load_bc_into_bc_reg) { ++ __ mv(bc_reg, bc); ++ } ++ } ++ ++ if (JvmtiExport::can_post_breakpoint()) { ++ Label L_fast_patch; ++ // if a breakpoint is present we can't rewrite the stream directly ++ __ load_unsigned_byte(temp_reg, at_bcp(0)); ++ __ addi(temp_reg, temp_reg, -Bytecodes::_breakpoint); // temp_reg is temporary register. ++ __ bnez(temp_reg, L_fast_patch); ++ // Let breakpoint table handling rewrite to quicker bytecode ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), xmethod, xbcp, bc_reg); ++ __ j(L_patch_done); ++ __ bind(L_fast_patch); ++ } ++ ++#ifdef ASSERT ++ Label L_okay; ++ __ load_unsigned_byte(temp_reg, at_bcp(0)); ++ __ beq(temp_reg, bc_reg, L_okay); ++ __ addi(temp_reg, temp_reg, -(int) Bytecodes::java_code(bc)); ++ __ beqz(temp_reg, L_okay); ++ __ stop("patching the wrong bytecode"); ++ __ bind(L_okay); ++#endif ++ ++ // patch bytecode ++ __ sb(bc_reg, at_bcp(0)); ++ __ bind(L_patch_done); ++} ++ ++// Individual instructions ++ ++void TemplateTable::nop() { ++ transition(vtos, vtos); ++ // nothing to do ++} ++ ++void TemplateTable::shouldnotreachhere() { ++ transition(vtos, vtos); ++ __ stop("should not reach here bytecode"); ++} ++ ++void TemplateTable::aconst_null() ++{ ++ transition(vtos, atos); ++ __ mv(x10, zr); ++} ++ ++void TemplateTable::iconst(int value) ++{ ++ transition(vtos, itos); ++ __ mv(x10, value); ++} ++ ++void TemplateTable::lconst(int value) ++{ ++ transition(vtos, ltos); ++ __ mv(x10, value); ++} ++ ++void TemplateTable::fconst(int value) ++{ ++ transition(vtos, ftos); ++ static float fBuf[2] = {1.0, 2.0}; ++ __ mv(t0, (intptr_t)fBuf); ++ switch (value) { ++ case 0: ++ __ fmv_w_x(f10, zr); ++ break; ++ case 1: ++ __ flw(f10, t0, 0); ++ break; ++ case 2: ++ __ flw(f10, t0, sizeof(float)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::dconst(int value) ++{ ++ transition(vtos, dtos); ++ static double dBuf[2] = {1.0, 2.0}; ++ __ mv(t0, (intptr_t)dBuf); ++ switch (value) { ++ case 0: ++ __ fmv_d_x(f10, zr); ++ break; ++ case 1: ++ __ fld(f10, t0, 0); ++ break; ++ case 2: ++ __ fld(f10, t0, sizeof(double)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::bipush() ++{ ++ transition(vtos, itos); ++ __ load_signed_byte(x10, at_bcp(1)); ++} ++ ++void TemplateTable::sipush() ++{ ++ transition(vtos, itos); ++ __ load_unsigned_short(x10, at_bcp(1)); ++ __ revb_w_w(x10, x10); ++ __ sraiw(x10, x10, 16); ++} ++ ++void TemplateTable::ldc(bool wide) ++{ ++ transition(vtos, vtos); ++ Label call_ldc, notFloat, notClass, notInt, Done; ++ ++ if (wide) { ++ __ get_unsigned_2_byte_index_at_bcp(x11, 1); ++ } else { ++ __ load_unsigned_byte(x11, at_bcp(1)); ++ } ++ __ get_cpool_and_tags(x12, x10); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type ++ __ addi(x13, x11, tags_offset); ++ __ add(x13, x10, x13); ++ __ membar(MacroAssembler::AnyAny); ++ __ lbu(x13, Address(x13, 0)); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ ++ // unresolved class - get the resolved class ++ __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClass); ++ __ beq(x13, t1, call_ldc); ++ ++ // unresolved class in error state - call into runtime to throw the error ++ // from the first resolution attempt ++ __ mv(t1, (u1)JVM_CONSTANT_UnresolvedClassInError); ++ __ beq(x13, t1, call_ldc); ++ ++ // resolved class - need to call vm to get java mirror of the class ++ __ mv(t1, (u1)JVM_CONSTANT_Class); ++ __ bne(x13, t1, notClass); ++ ++ __ bind(call_ldc); ++ __ mv(c_rarg1, wide); ++ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1); ++ __ push_ptr(x10); ++ __ verify_oop(x10); ++ __ j(Done); ++ ++ __ bind(notClass); ++ __ mv(t1, (u1)JVM_CONSTANT_Float); ++ __ bne(x13, t1, notFloat); ++ ++ // ftos ++ __ shadd(x11, x11, x12, x11, 3); ++ __ flw(f10, Address(x11, base_offset)); ++ __ push_f(f10); ++ __ j(Done); ++ ++ __ bind(notFloat); ++ ++ __ mv(t1, (u1)JVM_CONSTANT_Integer); ++ __ bne(x13, t1, notInt); ++ ++ // itos ++ __ shadd(x11, x11, x12, x11, 3); ++ __ lw(x10, Address(x11, base_offset)); ++ __ push_i(x10); ++ __ j(Done); ++ ++ __ bind(notInt); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++// Fast path for caching oop constants. ++void TemplateTable::fast_aldc(bool wide) ++{ ++ transition(vtos, atos); ++ ++ const Register result = x10; ++ const Register tmp = x11; ++ const Register rarg = x12; ++ ++ const int index_size = wide ? sizeof(u2) : sizeof(u1); ++ ++ Label resolved; ++ ++ // We are resolved if the resolved reference cache entry contains a ++ // non-null object (String, MethodType, etc.) ++ assert_different_registers(result, tmp); ++ __ get_cache_index_at_bcp(tmp, 1, index_size); ++ __ load_resolved_reference_at_index(result, tmp); ++ __ bnez(result, resolved); ++ ++ const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ ++ // first time invocation - must resolve first ++ __ mv(rarg, (int)bytecode()); ++ __ call_VM(result, entry, rarg); ++ ++ __ bind(resolved); ++ ++ { // Check for the null sentinel. ++ // If we just called the VM, it already did the mapping for us, ++ // but it's harmless to retry. ++ Label notNull; ++ ++ // Stash null_sentinel address to get its value later ++ int32_t offset = 0; ++ __ movptr_with_offset(rarg, Universe::the_null_sentinel_addr(), offset); ++ __ ld(tmp, Address(rarg, offset)); ++ __ bne(result, tmp, notNull); ++ __ mv(result, zr); // NULL object reference ++ __ bind(notNull); ++ } ++ ++ if (VerifyOops) { ++ // Safe to call with 0 result ++ __ verify_oop(result); ++ } ++} ++ ++void TemplateTable::ldc2_w() ++{ ++ transition(vtos, vtos); ++ Label notDouble, notLong, Done; ++ __ get_unsigned_2_byte_index_at_bcp(x10, 1); ++ ++ __ get_cpool_and_tags(x11, x12); ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type ++ __ add(x12, x12, x10); ++ __ load_unsigned_byte(x12, Address(x12, tags_offset)); ++ __ mv(t1, JVM_CONSTANT_Double); ++ __ bne(x12, t1, notDouble); ++ ++ // dtos ++ __ shadd(x12, x10, x11, x12, 3); ++ __ fld(f10, Address(x12, base_offset)); ++ __ push_d(f10); ++ __ j(Done); ++ ++ __ bind(notDouble); ++ __ mv(t1, (int)JVM_CONSTANT_Long); ++ __ bne(x12, t1, notLong); ++ ++ // ltos ++ __ shadd(x10, x10, x11, x10, 3); ++ __ ld(x10, Address(x10, base_offset)); ++ __ push_l(x10); ++ __ j(Done); ++ ++ __ bind(notLong); ++ condy_helper(Done); ++ __ bind(Done); ++ ++} ++ ++void TemplateTable::condy_helper(Label& Done) ++{ ++ const Register obj = x10; ++ const Register rarg = x11; ++ const Register flags = x12; ++ const Register off = x13; ++ ++ const address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ ++ __ mv(rarg, (int) bytecode()); ++ __ call_VM(obj, entry, rarg); ++ ++ __ get_vm_result_2(flags, xthread); ++ ++ // VMr = obj = base address to find primitive value to push ++ // VMr2 = flags = (tos, off) using format of CPCE::_flags ++ __ mv(off, flags); ++ __ mv(t0, ConstantPoolCacheEntry::field_index_mask); ++ __ andrw(off, off, t0); ++ ++ __ add(off, obj, off); ++ const Address field(off, 0); // base + R---->base + offset ++ ++ __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits)); ++ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> flags:0~3 ++ ++ switch (bytecode()) { ++ case Bytecodes::_ldc: // fall through ++ case Bytecodes::_ldc_w: { ++ // tos in (itos, ftos, stos, btos, ctos, ztos) ++ Label notInt, notFloat, notShort, notByte, notChar, notBool; ++ __ mv(t1, itos); ++ __ bne(flags, t1, notInt); ++ // itos ++ __ lw(x10, field); ++ __ push(itos); ++ __ j(Done); ++ ++ __ bind(notInt); ++ __ mv(t1, ftos); ++ __ bne(flags, t1, notFloat); ++ // ftos ++ __ load_float(field); ++ __ push(ftos); ++ __ j(Done); ++ ++ __ bind(notFloat); ++ __ mv(t1, stos); ++ __ bne(flags, t1, notShort); ++ // stos ++ __ load_signed_short(x10, field); ++ __ push(stos); ++ __ j(Done); ++ ++ __ bind(notShort); ++ __ mv(t1, btos); ++ __ bne(flags, t1, notByte); ++ // btos ++ __ load_signed_byte(x10, field); ++ __ push(btos); ++ __ j(Done); ++ ++ __ bind(notByte); ++ __ mv(t1, ctos); ++ __ bne(flags, t1, notChar); ++ // ctos ++ __ load_unsigned_short(x10, field); ++ __ push(ctos); ++ __ j(Done); ++ ++ __ bind(notChar); ++ __ mv(t1, ztos); ++ __ bne(flags, t1, notBool); ++ // ztos ++ __ load_signed_byte(x10, field); ++ __ push(ztos); ++ __ j(Done); ++ ++ __ bind(notBool); ++ break; ++ } ++ ++ case Bytecodes::_ldc2_w: { ++ Label notLong, notDouble; ++ __ mv(t1, ltos); ++ __ bne(flags, t1, notLong); ++ // ltos ++ __ ld(x10, field); ++ __ push(ltos); ++ __ j(Done); ++ ++ __ bind(notLong); ++ __ mv(t1, dtos); ++ __ bne(flags, t1, notDouble); ++ // dtos ++ __ load_double(field); ++ __ push(dtos); ++ __ j(Done); ++ ++ __ bind(notDouble); ++ break; ++ } ++ ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ __ stop("bad ldc/condy"); ++} ++ ++void TemplateTable::locals_index(Register reg, int offset) ++{ ++ __ lbu(reg, at_bcp(offset)); ++ __ neg(reg, reg); ++} ++ ++void TemplateTable::iload() { ++ iload_internal(); ++} ++ ++void TemplateTable::nofast_iload() { ++ iload_internal(may_not_rewrite); ++} ++ ++void TemplateTable::iload_internal(RewriteControl rc) { ++ transition(vtos, itos); ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ const Register bc = x14; ++ ++ // get next bytecode ++ __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); ++ ++ // if _iload, wait to rewrite to iload2. We only want to rewrite the ++ // last two iloads in a pair. Comparing against fast_iload means that ++ // the next bytecode is neither an iload or a caload, and therefore ++ // an iload pair. ++ __ mv(t1, Bytecodes::_iload); ++ __ beq(x11, t1, done); ++ ++ // if _fast_iload rewrite to _fast_iload2 ++ __ mv(t1, Bytecodes::_fast_iload); ++ __ mv(bc, Bytecodes::_fast_iload2); ++ __ beq(x11, t1, rewrite); ++ ++ // if _caload rewrite to _fast_icaload ++ __ mv(t1, Bytecodes::_caload); ++ __ mv(bc, Bytecodes::_fast_icaload); ++ __ beq(x11, t1, rewrite); ++ ++ // else rewrite to _fast_iload ++ __ mv(bc, Bytecodes::_fast_iload); ++ ++ // rewrite ++ // bc: new bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_iload, bc, x11, false); ++ __ bind(done); ++ ++ } ++ ++ // do iload, get the local value into tos ++ locals_index(x11); ++ __ lw(x10, iaddress(x11, x10, _masm)); ++} ++ ++void TemplateTable::fast_iload2() ++{ ++ transition(vtos, itos); ++ locals_index(x11); ++ __ lw(x10, iaddress(x11, x10, _masm)); ++ __ push(itos); ++ locals_index(x11, 3); ++ __ lw(x10, iaddress(x11, x10, _masm)); ++} ++ ++void TemplateTable::fast_iload() ++{ ++ transition(vtos, itos); ++ locals_index(x11); ++ __ lw(x10, iaddress(x11, x10, _masm)); ++} ++ ++void TemplateTable::lload() ++{ ++ transition(vtos, ltos); ++ __ lbu(x11, at_bcp(1)); ++ __ slli(x11, x11, LogBytesPerWord); ++ __ sub(x11, xlocals, x11); ++ __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1))); ++} ++ ++void TemplateTable::fload() ++{ ++ transition(vtos, ftos); ++ locals_index(x11); ++ __ flw(f10, faddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::dload() ++{ ++ transition(vtos, dtos); ++ __ lbu(x11, at_bcp(1)); ++ __ slli(x11, x11, LogBytesPerWord); ++ __ sub(x11, xlocals, x11); ++ __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1))); ++} ++ ++void TemplateTable::aload() ++{ ++ transition(vtos, atos); ++ locals_index(x11); ++ __ ld(x10, iaddress(x11, x10, _masm)); ++ ++} ++ ++void TemplateTable::locals_index_wide(Register reg) { ++ __ lhu(reg, at_bcp(2)); ++ __ revb_h_h_u(reg, reg); // reverse bytes in half-word and zero-extend ++ __ neg(reg, reg); ++} ++ ++void TemplateTable::wide_iload() { ++ transition(vtos, itos); ++ locals_index_wide(x11); ++ __ lw(x10, iaddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::wide_lload() ++{ ++ transition(vtos, ltos); ++ __ lhu(x11, at_bcp(2)); ++ __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend ++ __ slli(x11, x11, LogBytesPerWord); ++ __ sub(x11, xlocals, x11); ++ __ ld(x10, Address(x11, Interpreter::local_offset_in_bytes(1))); ++} ++ ++void TemplateTable::wide_fload() ++{ ++ transition(vtos, ftos); ++ locals_index_wide(x11); ++ __ flw(f10, faddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::wide_dload() ++{ ++ transition(vtos, dtos); ++ __ lhu(x11, at_bcp(2)); ++ __ revb_h_h_u(x11, x11); // reverse bytes in half-word and zero-extend ++ __ slli(x11, x11, LogBytesPerWord); ++ __ sub(x11, xlocals, x11); ++ __ fld(f10, Address(x11, Interpreter::local_offset_in_bytes(1))); ++} ++ ++void TemplateTable::wide_aload() ++{ ++ transition(vtos, atos); ++ locals_index_wide(x11); ++ __ ld(x10, aaddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::index_check(Register array, Register index) ++{ ++ // destroys x11, t0 ++ // check array ++ __ null_check(array, arrayOopDesc::length_offset_in_bytes()); ++ // sign extend index for use by indexed load ++ // check index ++ const Register length = t0; ++ __ lwu(length, Address(array, arrayOopDesc::length_offset_in_bytes())); ++ if (index != x11) { ++ assert(x11 != array, "different registers"); ++ __ mv(x11, index); ++ } ++ Label ok; ++ __ addw(index, index, zr); ++ __ bltu(index, length, ok); ++ __ mv(x13, array); ++ __ mv(t0, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); ++ __ jr(t0); ++ __ bind(ok); ++} ++ ++void TemplateTable::iaload() ++{ ++ transition(itos, itos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); ++ __ shadd(t0, x11, x10, t0, 2); ++ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++ __ addw(x10, x10, zr); // signed extended ++} ++ ++void TemplateTable::laload() ++{ ++ transition(itos, ltos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); ++ __ shadd(t0, x11, x10, t0, 3); ++ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++} ++ ++void TemplateTable::faload() ++{ ++ transition(itos, ftos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); ++ __ shadd(t0, x11, x10, t0, 2); ++ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++} ++ ++void TemplateTable::daload() ++{ ++ transition(itos, dtos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); ++ __ shadd(t0, x11, x10, t0, 3); ++ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++} ++ ++void TemplateTable::aaload() ++{ ++ transition(itos, atos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); ++ __ shadd(t0, x11, x10, t0, LogBytesPerHeapOop); ++ do_oop_load(_masm, ++ Address(t0), ++ x10, ++ IS_ARRAY); ++} ++ ++void TemplateTable::baload() ++{ ++ transition(itos, itos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); ++ __ shadd(t0, x11, x10, t0, 0); ++ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++} ++ ++void TemplateTable::caload() ++{ ++ transition(itos, itos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); ++ __ shadd(t0, x11, x10, t0, 1); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++} ++ ++// iload followed by caload frequent pair ++void TemplateTable::fast_icaload() ++{ ++ transition(vtos, itos); ++ // load index out of locals ++ locals_index(x12); ++ __ lw(x11, iaddress(x12, x11, _masm)); ++ __ pop_ptr(x10); ++ ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11, kills t0 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); // addi, max imm is 2^11 ++ __ shadd(t0, x11, x10, t0, 1); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++} ++ ++void TemplateTable::saload() ++{ ++ transition(itos, itos); ++ __ mv(x11, x10); ++ __ pop_ptr(x10); ++ // x10: array ++ // x11: index ++ index_check(x10, x11); // leaves index in x11, kills t0 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_SHORT) >> 1); ++ __ shadd(t0, x11, x10, t0, 1); ++ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, x10, Address(t0), noreg, noreg); ++} ++ ++void TemplateTable::iload(int n) ++{ ++ transition(vtos, itos); ++ __ lw(x10, iaddress(n)); ++} ++ ++void TemplateTable::lload(int n) ++{ ++ transition(vtos, ltos); ++ __ ld(x10, laddress(n)); ++} ++ ++void TemplateTable::fload(int n) ++{ ++ transition(vtos, ftos); ++ __ flw(f10, faddress(n)); ++} ++ ++void TemplateTable::dload(int n) ++{ ++ transition(vtos, dtos); ++ __ fld(f10, daddress(n)); ++} ++ ++void TemplateTable::aload(int n) ++{ ++ transition(vtos, atos); ++ __ ld(x10, iaddress(n)); ++} ++ ++void TemplateTable::aload_0() { ++ aload_0_internal(); ++} ++ ++void TemplateTable::nofast_aload_0() { ++ aload_0_internal(may_not_rewrite); ++} ++ ++void TemplateTable::aload_0_internal(RewriteControl rc) { ++ // According to bytecode histograms, the pairs: ++ // ++ // _aload_0, _fast_igetfield ++ // _aload_0, _fast_agetfield ++ // _aload_0, _fast_fgetfield ++ // ++ // occur frequently. If RewriteFrequentPairs is set, the (slow) ++ // _aload_0 bytecode checks if the next bytecode is either ++ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then ++ // rewrites the current bytecode into a pair bytecode; otherwise it ++ // rewrites the current bytecode into _fast_aload_0 that doesn't do ++ // the pair check anymore. ++ // ++ // Note: If the next bytecode is _getfield, the rewrite must be ++ // delayed, otherwise we may miss an opportunity for a pair. ++ // ++ // Also rewrite frequent pairs ++ // aload_0, aload_1 ++ // aload_0, iload_1 ++ // These bytecodes with a small amount of code are most profitable ++ // to rewrite ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ const Register bc = x14; ++ ++ // get next bytecode ++ __ load_unsigned_byte(x11, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); ++ ++ // if _getfield then wait with rewrite ++ __ mv(t1, Bytecodes::Bytecodes::_getfield); ++ __ beq(x11, t1, done); ++ ++ // if _igetfield then rewrite to _fast_iaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); ++ __ mv(t1, Bytecodes::_fast_igetfield); ++ __ mv(bc, Bytecodes::_fast_iaccess_0); ++ __ beq(x11, t1, rewrite); ++ ++ // if _agetfield then rewrite to _fast_aaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); ++ __ mv(t1, Bytecodes::_fast_agetfield); ++ __ mv(bc, Bytecodes::_fast_aaccess_0); ++ __ beq(x11, t1, rewrite); ++ ++ // if _fgetfield then rewrite to _fast_faccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); ++ __ mv(t1, Bytecodes::_fast_fgetfield); ++ __ mv(bc, Bytecodes::_fast_faccess_0); ++ __ beq(x11, t1, rewrite); ++ ++ // else rewrite to _fast_aload0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition"); ++ __ mv(bc, Bytecodes::Bytecodes::_fast_aload_0); ++ ++ // rewrite ++ // bc: new bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_aload_0, bc, x11, false); ++ ++ __ bind(done); ++ } ++ ++ // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop). ++ aload(0); ++} ++ ++void TemplateTable::istore() ++{ ++ transition(itos, vtos); ++ locals_index(x11); ++ __ sw(x10, iaddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::lstore() ++{ ++ transition(ltos, vtos); ++ locals_index(x11); ++ __ sd(x10, laddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::fstore() { ++ transition(ftos, vtos); ++ locals_index(x11); ++ __ fsw(f10, iaddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::dstore() { ++ transition(dtos, vtos); ++ locals_index(x11); ++ __ fsd(f10, daddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::astore() ++{ ++ transition(vtos, vtos); ++ __ pop_ptr(x10); ++ locals_index(x11); ++ __ sd(x10, aaddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::wide_istore() { ++ transition(vtos, vtos); ++ __ pop_i(); ++ locals_index_wide(x11); ++ __ sw(x10, iaddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::wide_lstore() { ++ transition(vtos, vtos); ++ __ pop_l(); ++ locals_index_wide(x11); ++ __ sd(x10, laddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::wide_fstore() { ++ transition(vtos, vtos); ++ __ pop_f(); ++ locals_index_wide(x11); ++ __ fsw(f10, faddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::wide_dstore() { ++ transition(vtos, vtos); ++ __ pop_d(); ++ locals_index_wide(x11); ++ __ fsd(f10, daddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::wide_astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(x10); ++ locals_index_wide(x11); ++ __ sd(x10, aaddress(x11, t0, _masm)); ++} ++ ++void TemplateTable::iastore() { ++ transition(itos, vtos); ++ __ pop_i(x11); ++ __ pop_ptr(x13); ++ // x10: value ++ // x11: index ++ // x13: array ++ index_check(x13, x11); // prefer index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_INT) >> 2); ++ __ shadd(t0, x11, x13, t0, 2); ++ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); ++} ++ ++void TemplateTable::lastore() { ++ transition(ltos, vtos); ++ __ pop_i(x11); ++ __ pop_ptr(x13); ++ // x10: value ++ // x11: index ++ // x13: array ++ index_check(x13, x11); // prefer index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_LONG) >> 3); ++ __ shadd(t0, x11, x13, t0, 3); ++ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); ++} ++ ++void TemplateTable::fastore() { ++ transition(ftos, vtos); ++ __ pop_i(x11); ++ __ pop_ptr(x13); ++ // f10: value ++ // x11: index ++ // x13: array ++ index_check(x13, x11); // prefer index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_FLOAT) >> 2); ++ __ shadd(t0, x11, x13, t0, 2); ++ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* ftos */, noreg, noreg, noreg); ++} ++ ++void TemplateTable::dastore() { ++ transition(dtos, vtos); ++ __ pop_i(x11); ++ __ pop_ptr(x13); ++ // f10: value ++ // x11: index ++ // x13: array ++ index_check(x13, x11); // prefer index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) >> 3); ++ __ shadd(t0, x11, x13, t0, 3); ++ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(t0, 0), noreg /* dtos */, noreg, noreg, noreg); ++} ++ ++void TemplateTable::aastore() { ++ Label is_null, ok_is_subtype, done; ++ transition(vtos, vtos); ++ // stack: ..., array, index, value ++ __ ld(x10, at_tos()); // value ++ __ ld(x12, at_tos_p1()); // index ++ __ ld(x13, at_tos_p2()); // array ++ ++ index_check(x13, x12); // kills x11 ++ __ add(x14, x12, arrayOopDesc::base_offset_in_bytes(T_OBJECT) >> LogBytesPerHeapOop); ++ __ shadd(x14, x14, x13, x14, LogBytesPerHeapOop); ++ ++ Address element_address(x14, 0); ++ ++ // do array store check - check for NULL value first ++ __ beqz(x10, is_null); ++ ++ // Move subklass into x11 ++ __ load_klass(x11, x10); ++ // Move superklass into x10 ++ __ load_klass(x10, x13); ++ __ ld(x10, Address(x10, ++ ObjArrayKlass::element_klass_offset())); ++ // Compress array + index * oopSize + 12 into a single register. Frees x12. ++ ++ // Generate subtype check. Blows x12, x15 ++ // Superklass in x10. Subklass in x11. ++ __ gen_subtype_check(x11, ok_is_subtype); //todo ++ ++ // Come here on failure ++ // object is at TOS ++ __ j(Interpreter::_throw_ArrayStoreException_entry); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ ++ // Get the value we will store ++ __ ld(x10, at_tos()); ++ // Now store using the appropriate barrier ++ do_oop_store(_masm, element_address, x10, IS_ARRAY); ++ __ j(done); ++ ++ // Have a NULL in x10, x13=array, x12=index. Store NULL at ary[idx] ++ __ bind(is_null); ++ __ profile_null_seen(x12); ++ ++ // Store a NULL ++ do_oop_store(_masm, element_address, noreg, IS_ARRAY); ++ ++ // Pop stack arguments ++ __ bind(done); ++ __ add(esp, esp, 3 * Interpreter::stackElementSize); ++ ++} ++ ++void TemplateTable::bastore() ++{ ++ transition(itos, vtos); ++ __ pop_i(x11); ++ __ pop_ptr(x13); ++ // x10: value ++ // x11: index ++ // x13: array ++ index_check(x13, x11); // prefer index in x11 ++ ++ // Need to check whether array is boolean or byte ++ // since both types share the bastore bytecode. ++ __ load_klass(x12, x13); ++ __ lwu(x12, Address(x12, Klass::layout_helper_offset())); ++ Label L_skip; ++ __ andi(t0, x12, Klass::layout_helper_boolean_diffbit()); ++ __ beqz(t0, L_skip); ++ __ andi(x10, x10, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1 ++ __ bind(L_skip); ++ ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_BYTE) >> 0); ++ ++ __ add(x11, x13, x11); ++ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(x11, 0), x10, noreg, noreg, noreg); ++} ++ ++void TemplateTable::castore() ++{ ++ transition(itos, vtos); ++ __ pop_i(x11); ++ __ pop_ptr(x13); ++ // x10: value ++ // x11: index ++ // x13: array ++ index_check(x13, x11); // prefer index in x11 ++ __ add(x11, x11, arrayOopDesc::base_offset_in_bytes(T_CHAR) >> 1); ++ __ shadd(t0, x11, x13, t0, 1); ++ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(t0, 0), x10, noreg, noreg, noreg); ++} ++ ++void TemplateTable::sastore() ++{ ++ castore(); ++} ++ ++void TemplateTable::istore(int n) ++{ ++ transition(itos, vtos); ++ __ sd(x10, iaddress(n)); ++} ++ ++void TemplateTable::lstore(int n) ++{ ++ transition(ltos, vtos); ++ __ sd(x10, laddress(n)); ++} ++ ++void TemplateTable::fstore(int n) ++{ ++ transition(ftos, vtos); ++ __ fsw(f10, faddress(n)); ++} ++ ++void TemplateTable::dstore(int n) ++{ ++ transition(dtos, vtos); ++ __ fsd(f10, daddress(n)); ++} ++ ++void TemplateTable::astore(int n) ++{ ++ transition(vtos, vtos); ++ __ pop_ptr(x10); ++ __ sd(x10, iaddress(n)); ++} ++ ++void TemplateTable::pop() ++{ ++ transition(vtos, vtos); ++ __ addi(esp, esp, Interpreter::stackElementSize); ++} ++ ++void TemplateTable::pop2() ++{ ++ transition(vtos, vtos); ++ __ addi(esp, esp, 2 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::dup() ++{ ++ transition(vtos, vtos); ++ __ ld(x10, Address(esp, 0)); ++ __ push_reg(x10); ++ // stack: ..., a, a ++} ++ ++void TemplateTable::dup_x1() ++{ ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ ld(x10, at_tos()); // load b ++ __ ld(x12, at_tos_p1()); // load a ++ __ sd(x10, at_tos_p1()); // store b ++ __ sd(x12, at_tos()); // store a ++ __ push_reg(x10); // push b ++ // stack: ..., b, a, b ++} ++ ++void TemplateTable::dup_x2() ++{ ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ ld(x10, at_tos()); // load c ++ __ ld(x12, at_tos_p2()); // load a ++ __ sd(x10, at_tos_p2()); // store c in a ++ __ push_reg(x10); // push c ++ // stack: ..., c, b, c, c ++ __ ld(x10, at_tos_p2()); // load b ++ __ sd(x12, at_tos_p2()); // store a in b ++ // stack: ..., c, a, c, c ++ __ sd(x10, at_tos_p1()); // store b in c ++ // stack: ..., c, a, b, c ++} ++ ++void TemplateTable::dup2() ++{ ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ ld(x10, at_tos_p1()); // load a ++ __ push_reg(x10); // push a ++ __ ld(x10, at_tos_p1()); // load b ++ __ push_reg(x10); // push b ++ // stack: ..., a, b, a, b ++} ++ ++void TemplateTable::dup2_x1() ++{ ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ ld(x12, at_tos()); // load c ++ __ ld(x10, at_tos_p1()); // load b ++ __ push_reg(x10); // push b ++ __ push_reg(x12); // push c ++ // stack: ..., a, b, c, b, c ++ __ sd(x12, at_tos_p3()); // store c in b ++ // stack: ..., a, c, c, b, c ++ __ ld(x12, at_tos_p4()); // load a ++ __ sd(x12, at_tos_p2()); // store a in 2nd c ++ // stack: ..., a, c, a, b, c ++ __ sd(x10, at_tos_p4()); // store b in a ++ // stack: ..., b, c, a, b, c ++} ++ ++void TemplateTable::dup2_x2() ++{ ++ transition(vtos, vtos); ++ // stack: ..., a, b, c, d ++ __ ld(x12, at_tos()); // load d ++ __ ld(x10, at_tos_p1()); // load c ++ __ push_reg(x10); // push c ++ __ push_reg(x12); // push d ++ // stack: ..., a, b, c, d, c, d ++ __ ld(x10, at_tos_p4()); // load b ++ __ sd(x10, at_tos_p2()); // store b in d ++ __ sd(x12, at_tos_p4()); // store d in b ++ // stack: ..., a, d, c, b, c, d ++ __ ld(x12, at_tos_p5()); // load a ++ __ ld(x10, at_tos_p3()); // load c ++ __ sd(x12, at_tos_p3()); // store a in c ++ __ sd(x10, at_tos_p5()); // store c in a ++ // stack: ..., c, d, a, b, c, d ++} ++ ++void TemplateTable::swap() ++{ ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ ld(x12, at_tos_p1()); // load a ++ __ ld(x10, at_tos()); // load b ++ __ sd(x12, at_tos()); // store a in b ++ __ sd(x10, at_tos_p1()); // store b in a ++ // stack: ..., b, a ++} ++ ++void TemplateTable::iop2(Operation op) ++{ ++ transition(itos, itos); ++ // x10 <== x11 op x10 ++ __ pop_i(x11); ++ switch (op) { ++ case add : __ addw(x10, x11, x10); break; ++ case sub : __ subw(x10, x11, x10); break; ++ case mul : __ mulw(x10, x11, x10); break; ++ case _and : __ andrw(x10, x11, x10); break; ++ case _or : __ orrw(x10, x11, x10); break; ++ case _xor : __ xorrw(x10, x11, x10); break; ++ case shl : __ sllw(x10, x11, x10); break; ++ case shr : __ sraw(x10, x11, x10); break; ++ case ushr : __ srlw(x10, x11, x10); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::lop2(Operation op) ++{ ++ transition(ltos, ltos); ++ // x10 <== x11 op x10 ++ __ pop_l(x11); ++ switch (op) { ++ case add : __ add(x10, x11, x10); break; ++ case sub : __ sub(x10, x11, x10); break; ++ case mul : __ mul(x10, x11, x10); break; ++ case _and : __ andr(x10, x11, x10); break; ++ case _or : __ orr(x10, x11, x10); break; ++ case _xor : __ xorr(x10, x11, x10); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::idiv() ++{ ++ transition(itos, itos); ++ // explicitly check for div0 ++ Label no_div0; ++ __ bnez(x10, no_div0); ++ __ mv(t0, Interpreter::_throw_ArithmeticException_entry); ++ __ jr(t0); ++ __ bind(no_div0); ++ __ pop_i(x11); ++ // x10 <== x11 idiv x10 ++ __ corrected_idivl(x10, x11, x10, /* want_remainder */ false); ++} ++ ++void TemplateTable::irem() ++{ ++ transition(itos, itos); ++ // explicitly check for div0 ++ Label no_div0; ++ __ bnez(x10, no_div0); ++ __ mv(t0, Interpreter::_throw_ArithmeticException_entry); ++ __ jr(t0); ++ __ bind(no_div0); ++ __ pop_i(x11); ++ // x10 <== x11 irem x10 ++ __ corrected_idivl(x10, x11, x10, /* want_remainder */ true); ++} ++ ++void TemplateTable::lmul() ++{ ++ transition(ltos, ltos); ++ __ pop_l(x11); ++ __ mul(x10, x10, x11); ++} ++ ++void TemplateTable::ldiv() ++{ ++ transition(ltos, ltos); ++ // explicitly check for div0 ++ Label no_div0; ++ __ bnez(x10, no_div0); ++ __ mv(t0, Interpreter::_throw_ArithmeticException_entry); ++ __ jr(t0); ++ __ bind(no_div0); ++ __ pop_l(x11); ++ // x10 <== x11 ldiv x10 ++ __ corrected_idivq(x10, x11, x10, /* want_remainder */ false); ++} ++ ++void TemplateTable::lrem() ++{ ++ transition(ltos, ltos); ++ // explicitly check for div0 ++ Label no_div0; ++ __ bnez(x10, no_div0); ++ __ mv(t0, Interpreter::_throw_ArithmeticException_entry); ++ __ jr(t0); ++ __ bind(no_div0); ++ __ pop_l(x11); ++ // x10 <== x11 lrem x10 ++ __ corrected_idivq(x10, x11, x10, /* want_remainder */ true); ++} ++ ++void TemplateTable::lshl() ++{ ++ transition(itos, ltos); ++ // shift count is in x10 ++ __ pop_l(x11); ++ __ sll(x10, x11, x10); ++} ++ ++void TemplateTable::lshr() ++{ ++ transition(itos, ltos); ++ // shift count is in x10 ++ __ pop_l(x11); ++ __ sra(x10, x11, x10); ++} ++ ++void TemplateTable::lushr() ++{ ++ transition(itos, ltos); ++ // shift count is in x10 ++ __ pop_l(x11); ++ __ srl(x10, x11, x10); ++} ++ ++void TemplateTable::fop2(Operation op) ++{ ++ transition(ftos, ftos); ++ switch (op) { ++ case add: ++ __ pop_f(f11); ++ __ fadd_s(f10, f11, f10); ++ break; ++ case sub: ++ __ pop_f(f11); ++ __ fsub_s(f10, f11, f10); ++ break; ++ case mul: ++ __ pop_f(f11); ++ __ fmul_s(f10, f11, f10); ++ break; ++ case div: ++ __ pop_f(f11); ++ __ fdiv_s(f10, f11, f10); ++ break; ++ case rem: ++ __ fmv_s(f11, f10); ++ __ pop_f(f10); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::dop2(Operation op) ++{ ++ transition(dtos, dtos); ++ switch (op) { ++ case add: ++ __ pop_d(f11); ++ __ fadd_d(f10, f11, f10); ++ break; ++ case sub: ++ __ pop_d(f11); ++ __ fsub_d(f10, f11, f10); ++ break; ++ case mul: ++ __ pop_d(f11); ++ __ fmul_d(f10, f11, f10); ++ break; ++ case div: ++ __ pop_d(f11); ++ __ fdiv_d(f10, f11, f10); ++ break; ++ case rem: ++ __ fmv_d(f11, f10); ++ __ pop_d(f10); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem)); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::ineg() ++{ ++ transition(itos, itos); ++ __ negw(x10, x10); ++} ++ ++void TemplateTable::lneg() ++{ ++ transition(ltos, ltos); ++ __ neg(x10, x10); ++} ++ ++void TemplateTable::fneg() ++{ ++ transition(ftos, ftos); ++ __ fneg_s(f10, f10); ++} ++ ++void TemplateTable::dneg() ++{ ++ transition(dtos, dtos); ++ __ fneg_d(f10, f10); ++} ++ ++void TemplateTable::iinc() ++{ ++ transition(vtos, vtos); ++ __ load_signed_byte(x11, at_bcp(2)); // get constant ++ locals_index(x12); ++ __ ld(x10, iaddress(x12, x10, _masm)); ++ __ addw(x10, x10, x11); ++ __ sd(x10, iaddress(x12, t0, _masm)); ++} ++ ++void TemplateTable::wide_iinc() ++{ ++ transition(vtos, vtos); ++ __ lwu(x11, at_bcp(2)); // get constant and index ++ __ revb_h_w_u(x11, x11); // reverse bytes in half-word (32bit) and zero-extend ++ __ zero_extend(x12, x11, 16); ++ __ neg(x12, x12); ++ __ slli(x11, x11, 32); ++ __ srai(x11, x11, 48); ++ __ ld(x10, iaddress(x12, t0, _masm)); ++ __ addw(x10, x10, x11); ++ __ sd(x10, iaddress(x12, t0, _masm)); ++} ++ ++void TemplateTable::convert() ++{ ++ // Checking ++#ifdef ASSERT ++ { ++ TosState tos_in = ilgl; ++ TosState tos_out = ilgl; ++ switch (bytecode()) { ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_in = itos; break; ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_l2d: tos_in = ltos; break; ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_f2d: tos_in = ftos; break; ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_d2l: // fall through ++ case Bytecodes::_d2f: tos_in = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ switch (bytecode()) { ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_out = itos; break; ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_d2l: tos_out = ltos; break; ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_d2f: tos_out = ftos; break; ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_l2d: // fall through ++ case Bytecodes::_f2d: tos_out = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ transition(tos_in, tos_out); ++ } ++#endif // ASSERT ++ ++ // Conversion ++ switch (bytecode()) { ++ case Bytecodes::_i2l: ++ __ sign_extend(x10, x10, 32); ++ break; ++ case Bytecodes::_i2f: ++ __ fcvt_s_w(f10, x10); ++ break; ++ case Bytecodes::_i2d: ++ __ fcvt_d_w(f10, x10); ++ break; ++ case Bytecodes::_i2b: ++ __ sign_extend(x10, x10, 8); ++ break; ++ case Bytecodes::_i2c: ++ __ zero_extend(x10, x10, 16); ++ break; ++ case Bytecodes::_i2s: ++ __ sign_extend(x10, x10, 16); ++ break; ++ case Bytecodes::_l2i: ++ __ addw(x10, x10, zr); ++ break; ++ case Bytecodes::_l2f: ++ __ fcvt_s_l(f10, x10); ++ break; ++ case Bytecodes::_l2d: ++ __ fcvt_d_l(f10, x10); ++ break; ++ case Bytecodes::_f2i: ++ __ fcvt_w_s_safe(x10, f10); ++ break; ++ case Bytecodes::_f2l: ++ __ fcvt_l_s_safe(x10, f10); ++ break; ++ case Bytecodes::_f2d: ++ __ fcvt_d_s(f10, f10); ++ break; ++ case Bytecodes::_d2i: ++ __ fcvt_w_d_safe(x10, f10); ++ break; ++ case Bytecodes::_d2l: ++ __ fcvt_l_d_safe(x10, f10); ++ break; ++ case Bytecodes::_d2f: ++ __ fcvt_s_d(f10, f10); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::lcmp() ++{ ++ transition(ltos, itos); ++ __ pop_l(x11); ++ __ cmp_l2i(t0, x11, x10); ++ __ mv(x10, t0); ++} ++ ++void TemplateTable::float_cmp(bool is_float, int unordered_result) ++{ ++ // For instruction feq, flt and fle, the result is 0 if either operand is NaN ++ if (is_float) { ++ __ pop_f(f11); ++ // if unordered_result < 0: ++ // we want -1 for unordered or less than, 0 for equal and 1 for ++ // greater than. ++ // else: ++ // we want -1 for less than, 0 for equal and 1 for unordered or ++ // greater than. ++ // f11 primary, f10 secondary ++ __ float_compare(x10, f11, f10, unordered_result); ++ } else { ++ __ pop_d(f11); ++ // if unordered_result < 0: ++ // we want -1 for unordered or less than, 0 for equal and 1 for ++ // greater than. ++ // else: ++ // we want -1 for less than, 0 for equal and 1 for unordered or ++ // greater than. ++ // f11 primary, f10 secondary ++ __ double_compare(x10, f11, f10, unordered_result); ++ } ++} ++ ++void TemplateTable::branch(bool is_jsr, bool is_wide) ++{ ++ // We might be moving to a safepoint. The thread which calls ++ // Interpreter::notice_safepoints() will effectively flush its cache ++ // when it makes a system call, but we need to do something to ++ // ensure that we see the changed dispatch table. ++ __ membar(MacroAssembler::LoadLoad); ++ ++ __ profile_taken_branch(x10, x11); ++ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset(); ++ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset(); ++ ++ // load branch displacement ++ if (!is_wide) { ++ __ lhu(x12, at_bcp(1)); ++ __ revb_h_h(x12, x12); // reverse bytes in half-word and sign-extend ++ } else { ++ __ lwu(x12, at_bcp(1)); ++ __ revb_w_w(x12, x12); // reverse bytes in word and sign-extend ++ } ++ ++ // Handle all the JSR stuff here, then exit. ++ // It's much shorter and cleaner than intermingling with the non-JSR ++ // normal-branch stuff occurring below. ++ ++ if (is_jsr) { ++ // compute return address as bci ++ __ ld(t1, Address(xmethod, Method::const_offset())); ++ __ add(t1, t1, ++ in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3)); ++ __ sub(x11, xbcp, t1); ++ __ push_i(x11); ++ // Adjust the bcp by the 16-bit displacement in x12 ++ __ add(xbcp, xbcp, x12); ++ __ load_unsigned_byte(t0, Address(xbcp, 0)); ++ // load the next target bytecode into t0, it is the argument of dispatch_only ++ __ dispatch_only(vtos, /*generate_poll*/true); ++ return; ++ } ++ ++ // Normal (non-jsr) branch handling ++ ++ // Adjust the bcp by the displacement in x12 ++ __ add(xbcp, xbcp, x12); ++ ++ assert(UseLoopCounter || !UseOnStackReplacement, ++ "on-stack-replacement requires loop counters"); ++ Label backedge_counter_overflow; ++ Label profile_method; ++ Label dispatch; ++ if (UseLoopCounter) { ++ // increment backedge counter for backward branches ++ // x10: MDO ++ // x11: MDO bumped taken-count ++ // x12: target offset ++ __ bgtz(x12, dispatch); // count only if backward branch ++ ++ // check if MethodCounters exists ++ Label has_counters; ++ __ ld(t0, Address(xmethod, Method::method_counters_offset())); ++ __ bnez(t0, has_counters); ++ __ push_reg(x10); ++ __ push_reg(x11); ++ __ push_reg(x12); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), xmethod); ++ __ pop_reg(x12); ++ __ pop_reg(x11); ++ __ pop_reg(x10); ++ __ ld(t0, Address(xmethod, Method::method_counters_offset())); ++ __ beqz(t0, dispatch); // No MethodCounters allocated, OutOfMemory ++ __ bind(has_counters); ++ ++ if (TieredCompilation) { ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(x11, Address(xmethod, in_bytes(Method::method_data_offset()))); ++ __ beqz(x11, no_mdo); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(x11, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ const Address mask(x11, in_bytes(MethodData::backedge_mask_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ x10, t0, false, ++ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); ++ __ j(dispatch); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld(t0, Address(xmethod, Method::method_counters_offset())); ++ const Address mask(t0, in_bytes(MethodCounters::backedge_mask_offset())); ++ __ increment_mask_and_jump(Address(t0, be_offset), increment, mask, ++ x10, t1, false, ++ UseOnStackReplacement ? &backedge_counter_overflow : &dispatch); ++ } else { // not TieredCompilation ++ // increment counter ++ __ ld(t1, Address(xmethod, Method::method_counters_offset())); ++ __ lwu(x10, Address(t1, be_offset)); // load backedge counter ++ __ addw(t0, x10, InvocationCounter::count_increment); // increment counter ++ __ sw(t0, Address(t1, be_offset)); // store counter ++ ++ __ lwu(x10, Address(t1, inv_offset)); // load invocation counter ++ __ andi(x10, x10, (unsigned)InvocationCounter::count_mask_value, x13); // and the status bits ++ __ addw(x10, x10, t0); // add both counters ++ ++ if (ProfileInterpreter) { ++ // Test to see if we should create a method data oop ++ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); ++ __ blt(x10, t0, dispatch); ++ ++ // if no method data exists, go to profile method ++ __ test_method_data_pointer(x10, profile_method); ++ ++ if (UseOnStackReplacement) { ++ // check for overflow against x11 which is the MDO taken count ++ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); ++ __ bltu(x11, t0, dispatch); // Intel == Assembler::below, lo:unsigned lower ++ ++ // When ProfileInterpreter is on, the backedge_count comes ++ // from the MethodData*, which value does not get reset on ++ // the call to frequency_counter_overflow(). To avoid ++ // excessive calls to the overflow routine while the method is ++ // being compiled, add a second test to make sure the overflow ++ // function is called only once every overflow_frequency. ++ const int overflow_frequency = 1024; ++ __ andi(x11, x11, overflow_frequency - 1); ++ __ beqz(x11, backedge_counter_overflow); ++ ++ } ++ } else { ++ if (UseOnStackReplacement) { ++ // check for overflow against x10, which is the sum of the ++ // counters ++ __ lwu(t0, Address(t1, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); ++ __ bgeu(x10, t0, backedge_counter_overflow); // Intel == Assembler::aboveEqual ++ } ++ } ++ } ++ __ bind(dispatch); ++ } ++ ++ // Pre-load the next target bytecode into t0 ++ __ load_unsigned_byte(t0, Address(xbcp, 0)); ++ ++ // continue with the bytecode @ target ++ // t0: target bytecode ++ // xbcp: target bcp ++ __ dispatch_only(vtos, /*generate_poll*/true); ++ ++ if (UseLoopCounter) { ++ if (ProfileInterpreter && !TieredCompilation) { ++ // Out-of-line code to allocate method data oop. ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode ++ __ set_method_data_pointer_for_bcp(); ++ __ j(dispatch); ++ } ++ ++ if (UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ neg(x12, x12); ++ __ add(x12, x12, xbcp); // branch xbcp ++ // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), ++ x12); ++ __ load_unsigned_byte(x11, Address(xbcp, 0)); // restore target bytecode ++ ++ // x10: osr nmethod (osr ok) or NULL (osr not possible) ++ // w11: target bytecode ++ // x12: temporary ++ __ beqz(x10, dispatch); // test result -- no osr if null ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ lbu(x12, Address(x10, nmethod::state_offset())); ++ if (nmethod::in_use != 0) { ++ __ sub(x12, x12, nmethod::in_use); ++ } ++ __ bnez(x12, dispatch); ++ ++ // We have the address of an on stack replacement routine in x10 ++ // We need to prepare to execute the OSR method. First we must ++ // migrate the locals and monitors off of the stack. ++ ++ __ mv(x9, x10); // save the nmethod ++ ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ ++ // x10 is OSR buffer, move it to expected parameter location ++ __ mv(j_rarg0, x10); ++ ++ // remove activation ++ // get sender esp ++ __ ld(esp, ++ Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ // remove frame anchor ++ __ leave(); ++ // Ensure compiled code always sees stack at proper alignment ++ __ andi(sp, esp, -16); ++ ++ // and begin the OSR nmethod ++ __ ld(t0, Address(x9, nmethod::osr_entry_point_offset())); ++ __ jr(t0); ++ } ++ } ++ ++} ++ ++void TemplateTable::if_0cmp(Condition cc) ++{ ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ ++ __ addw(x10, x10, zr); ++ switch (cc) { ++ case equal: ++ __ bnez(x10, not_taken); ++ break; ++ case not_equal: ++ __ beqz(x10, not_taken); ++ break; ++ case less: ++ __ bgez(x10, not_taken); ++ break; ++ case less_equal: ++ __ bgtz(x10, not_taken); ++ break; ++ case greater: ++ __ blez(x10, not_taken); ++ break; ++ case greater_equal: ++ __ bltz(x10, not_taken); ++ break; ++ default: ++ break; ++ } ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(x10); ++} ++ ++void TemplateTable::if_icmp(Condition cc) ++{ ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ __ pop_i(x11); ++ __ addw(x10, x10, zr); ++ switch (cc) { ++ case equal: ++ __ bne(x11, x10, not_taken); ++ break; ++ case not_equal: ++ __ beq(x11, x10, not_taken); ++ break; ++ case less: ++ __ bge(x11, x10, not_taken); ++ break; ++ case less_equal: ++ __ bgt(x11, x10, not_taken); ++ break; ++ case greater: ++ __ ble(x11, x10, not_taken); ++ break; ++ case greater_equal: ++ __ blt(x11, x10, not_taken); ++ break; ++ default: ++ break; ++ } ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(x10); ++} ++ ++void TemplateTable::if_nullcmp(Condition cc) ++{ ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ if (cc == equal) { ++ __ bnez(x10, not_taken); ++ } else { ++ __ beqz(x10, not_taken); ++ } ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(x10); ++} ++ ++void TemplateTable::if_acmp(Condition cc) ++{ ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ __ pop_ptr(x11); ++ ++ if (cc == equal) { ++ __ oop_nequal(x11, x10, not_taken); ++ } else if (cc == not_equal) { ++ __ oop_equal(x11, x10, not_taken); ++ } ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(x10); ++} ++ ++void TemplateTable::ret() { ++ transition(vtos, vtos); ++ // We might be moving to a safepoint. The thread which calls ++ // Interpreter::notice_safepoints() will effectively flush its cache ++ // when it makes a system call, but we need to do something to ++ // ensure that we see the changed dispatch table. ++ __ membar(MacroAssembler::LoadLoad); ++ ++ locals_index(x11); ++ __ ld(x11, aaddress(x11, t1, _masm)); // get return bci, compute return bcp ++ __ profile_ret(x11, x12); ++ __ ld(xbcp, Address(xmethod, Method::const_offset())); ++ __ add(xbcp, xbcp, x11); ++ __ addi(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); ++ __ dispatch_next(vtos, 0, /*generate_poll*/true); ++} ++ ++void TemplateTable::wide_ret() { ++ transition(vtos, vtos); ++ locals_index_wide(x11); ++ __ ld(x11, aaddress(x11, t0, _masm)); // get return bci, compute return bcp ++ __ profile_ret(x11, x12); ++ __ ld(xbcp, Address(xmethod, Method::const_offset())); ++ __ add(xbcp, xbcp, x11); ++ __ add(xbcp, xbcp, in_bytes(ConstMethod::codes_offset())); ++ __ dispatch_next(vtos, 0, /*generate_poll*/true); ++} ++ ++void TemplateTable::tableswitch() { ++ Label default_case, continue_execution; ++ transition(itos, vtos); ++ // align xbcp ++ __ la(x11, at_bcp(BytesPerInt)); ++ __ andi(x11, x11, -BytesPerInt); ++ // load lo & hi ++ __ lwu(x12, Address(x11, BytesPerInt)); ++ __ lwu(x13, Address(x11, 2 * BytesPerInt)); ++ __ revb_w_w(x12, x12); // reverse bytes in word (32bit) and sign-extend ++ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend ++ // check against lo & hi ++ __ blt(x10, x12, default_case); ++ __ bgt(x10, x13, default_case); ++ // lookup dispatch offset ++ __ subw(x10, x10, x12); ++ __ shadd(x13, x10, x11, t0, 2); ++ __ lwu(x13, Address(x13, 3 * BytesPerInt)); ++ __ profile_switch_case(x10, x11, x12); ++ // continue execution ++ __ bind(continue_execution); ++ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend ++ __ add(xbcp, xbcp, x13); ++ __ load_unsigned_byte(t0, Address(xbcp)); ++ __ dispatch_only(vtos, /*generate_poll*/true); ++ // handle default ++ __ bind(default_case); ++ __ profile_switch_default(x10); ++ __ lwu(x13, Address(x11, 0)); ++ __ j(continue_execution); ++} ++ ++void TemplateTable::lookupswitch() { ++ transition(itos, itos); ++ __ stop("lookupswitch bytecode should have been rewritten"); ++} ++ ++void TemplateTable::fast_linearswitch() { ++ transition(itos, vtos); ++ Label loop_entry, loop, found, continue_execution; ++ // bswap x10 so we can avoid bswapping the table entries ++ __ revb_w_w(x10, x10); // reverse bytes in word (32bit) and sign-extend ++ // align xbcp ++ __ la(x9, at_bcp(BytesPerInt)); // btw: should be able to get rid of ++ // this instruction (change offsets ++ // below) ++ __ andi(x9, x9, -BytesPerInt); ++ // set counter ++ __ lwu(x11, Address(x9, BytesPerInt)); ++ __ revb_w(x11, x11); ++ __ j(loop_entry); ++ // table search ++ __ bind(loop); ++ __ shadd(t0, x11, x9, t0, 3); ++ __ lw(t0, Address(t0, 2 * BytesPerInt)); ++ __ beq(x10, t0, found); ++ __ bind(loop_entry); ++ __ addi(x11, x11, -1); ++ __ bgez(x11, loop); ++ // default case ++ __ profile_switch_default(x10); ++ __ lwu(x13, Address(x9, 0)); ++ __ j(continue_execution); ++ // entry found -> get offset ++ __ bind(found); ++ __ shadd(t0, x11, x9, t0, 3); ++ __ lwu(x13, Address(t0, 3 * BytesPerInt)); ++ __ profile_switch_case(x11, x10, x9); ++ // continue execution ++ __ bind(continue_execution); ++ __ revb_w_w(x13, x13); // reverse bytes in word (32bit) and sign-extend ++ __ add(xbcp, xbcp, x13); ++ __ lbu(t0, Address(xbcp, 0)); ++ __ dispatch_only(vtos, /*generate_poll*/true); ++} ++ ++void TemplateTable::fast_binaryswitch() { ++ transition(itos, vtos); ++ // Implementation using the following core algorithm: ++ // ++ // int binary_search(int key, LookupswitchPair* array, int n) ++ // binary_search start: ++ // #Binary search according to "Methodik des Programmierens" by ++ // # Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. ++ // int i = 0; ++ // int j = n; ++ // while (i + 1 < j) do ++ // # invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) ++ // # with Q: for all i: 0 <= i < n: key < a[i] ++ // # where a stands for the array and assuming that the (inexisting) ++ // # element a[n] is infinitely big. ++ // int h = (i + j) >> 1 ++ // # i < h < j ++ // if (key < array[h].fast_match()) ++ // then [j = h] ++ // else [i = h] ++ // end ++ // # R: a[i] <= key < a[i+1] or Q ++ // # (i.e., if key is within array, i is the correct index) ++ // return i ++ // binary_search end ++ ++ ++ // Register allocation ++ const Register key = x10; // already set (tosca) ++ const Register array = x11; ++ const Register i = x12; ++ const Register j = x13; ++ const Register h = x14; ++ const Register temp = x15; ++ ++ // Find array start ++ __ la(array, at_bcp(3 * BytesPerInt)); // btw: should be able to ++ // get rid of this ++ // instruction (change ++ // offsets below) ++ __ andi(array, array, -BytesPerInt); ++ ++ // Initialize i & j ++ __ mv(i, zr); // i = 0 ++ __ lwu(j, Address(array, -BytesPerInt)); // j = length(array) ++ ++ // Convert j into native byteordering ++ __ revb_w(j, j); ++ ++ // And start ++ Label entry; ++ __ j(entry); ++ ++ // binary search loop ++ { ++ Label loop; ++ __ bind(loop); ++ __ addw(h, i, j); // h = i + j ++ __ srliw(h, h, 1); // h = (i + j) >> 1 ++ // if [key < array[h].fast_match()] ++ // then [j = h] ++ // else [i = h] ++ // Convert array[h].match to native byte-ordering before compare ++ __ shadd(temp, h, array, temp, 3); ++ __ ld(temp, Address(temp, 0)); ++ __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend ++ ++ Label L_done, L_greater; ++ __ bge(key, temp, L_greater); ++ // if [key < array[h].fast_match()] then j = h ++ __ mv(j, h); ++ __ j(L_done); ++ __ bind(L_greater); ++ // if [key >= array[h].fast_match()] then i = h ++ __ mv(i, h); ++ __ bind(L_done); ++ ++ // while [i + 1 < j] ++ __ bind(entry); ++ __ addiw(h, i, 1); // i + 1 ++ __ blt(h, j, loop); // i + 1 < j ++ } ++ ++ // end of binary search, result index is i (must check again!) ++ Label default_case; ++ // Convert array[i].match to native byte-ordering before compare ++ __ shadd(temp, i, array, temp, 3); ++ __ ld(temp, Address(temp, 0)); ++ __ revb_w_w(temp, temp); // reverse bytes in word (32bit) and sign-extend ++ __ bne(key, temp, default_case); ++ ++ // entry found -> j = offset ++ __ shadd(temp, i, array, temp, 3); ++ __ lwu(j, Address(temp, BytesPerInt)); ++ __ profile_switch_case(i, key, array); ++ __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend ++ ++ __ add(temp, xbcp, j); ++ __ load_unsigned_byte(t0, Address(temp, 0)); ++ ++ __ add(xbcp, xbcp, j); ++ __ la(xbcp, Address(xbcp, 0)); ++ __ dispatch_only(vtos, /*generate_poll*/true); ++ ++ // default case -> j = default offset ++ __ bind(default_case); ++ __ profile_switch_default(i); ++ __ lwu(j, Address(array, -2 * BytesPerInt)); ++ __ revb_w_w(j, j); // reverse bytes in word (32bit) and sign-extend ++ ++ __ add(temp, xbcp, j); ++ __ load_unsigned_byte(t0, Address(temp, 0)); ++ ++ __ add(xbcp, xbcp, j); ++ __ la(xbcp, Address(xbcp, 0)); ++ __ dispatch_only(vtos, /*generate_poll*/true); ++} ++ ++void TemplateTable::_return(TosState state) ++{ ++ transition(state, state); ++ assert(_desc->calls_vm(), ++ "inconsistent calls_vm information"); // call in remove_activation ++ ++ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { ++ assert(state == vtos, "only valid state"); ++ ++ __ ld(c_rarg1, aaddress(0)); ++ __ load_klass(x13, c_rarg1); ++ __ lwu(x13, Address(x13, Klass::access_flags_offset())); ++ Label skip_register_finalizer; ++ __ andi(t0, x13, JVM_ACC_HAS_FINALIZER); ++ __ beqz(t0, skip_register_finalizer); ++ ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1); ++ ++ __ bind(skip_register_finalizer); ++ } ++ ++ // Issue a StoreStore barrier after all stores but before return ++ // from any constructor for any class with a final field. We don't ++ // know if this is a finalizer, so we always do so. ++ if (_desc->bytecode() == Bytecodes::_return) { ++ __ membar(MacroAssembler::StoreStore); ++ } ++ ++ // Narrow result if state is itos but result type is smaller. ++ // Need to narrow in the return bytecode rather than in generate_return_entry ++ // since compiled code callers expect the result to already be narrowed. ++ if (state == itos) { ++ __ narrow(x10); ++ } ++ ++ __ remove_activation(state); ++ __ ret(); ++} ++ ++ ++// ---------------------------------------------------------------------------- ++// Volatile variables demand their effects be made known to all CPU's ++// in order. Store buffers on most chips allow reads & writes to ++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode ++// without some kind of memory barrier (i.e., it's not sufficient that ++// the interpreter does not reorder volatile references, the hardware ++// also must not reorder them). ++// ++// According to the new Java Memory Model (JMM): ++// (1) All volatiles are serialized wrt to each other. ALSO reads & ++// writes act as aquire & release, so: ++// (2) A read cannot let unrelated NON-volatile memory refs that ++// happen after the read float up to before the read. It's OK for ++// non-volatile memory refs that happen before the volatile read to ++// float down below it. ++// (3) Similar a volatile write cannot let unrelated NON-volatile ++// memory refs that happen BEFORE the write float down to after the ++// write. It's OK for non-volatile memory refs that happen after the ++// volatile write to float up before it. ++// ++// We only put in barriers around volatile refs (they are expensive), ++// not _between_ memory refs (that would require us to track the ++// flavor of the previous memory refs). Requirements (2) and (3) ++// require some barriers before volatile stores and after volatile ++// loads. These nearly cover requirement (1) but miss the ++// volatile-store-volatile-load case. This final case is placed after ++// volatile-stores although it could just as well go before ++// volatile-loads. ++ ++void TemplateTable::resolve_cache_and_index(int byte_no, ++ Register Rcache, ++ Register index, ++ size_t index_size) { ++ const Register temp = x9; ++ assert_different_registers(Rcache, index, temp); ++ ++ Label resolved; ++ ++ Bytecodes::Code code = bytecode(); ++ switch (code) { ++ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; ++ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; ++ default: break; ++ } ++ ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); ++ __ mv(t0, (int) code); ++ __ beq(temp, t0, resolved); ++ ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); ++ __ mv(temp, (int) code); ++ __ call_VM(noreg, entry, temp); ++ ++ // Update registers with resolved info ++ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); ++ // n.b. unlike x86 Rcache is now rcpool plus the indexed offset ++ // so all clients ofthis method must be modified accordingly ++ __ bind(resolved); ++} ++ ++// The Rcache and index registers must be set before call ++// n.b unlike x86 cache already includes the index offset ++void TemplateTable::load_field_cp_cache_entry(Register obj, ++ Register cache, ++ Register index, ++ Register off, ++ Register flags, ++ bool is_static = false) { ++ assert_different_registers(cache, index, flags, off); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ // Field offset ++ __ ld(off, Address(cache, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::f2_offset()))); ++ // Flags ++ __ lwu(flags, Address(cache, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset()))); ++ ++ // klass overwrite register ++ if (is_static) { ++ __ ld(obj, Address(cache, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::f1_offset()))); ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld(obj, Address(obj, mirror_offset)); ++ __ resolve_oop_handle(obj); ++ } ++} ++ ++void TemplateTable::load_invoke_cp_cache_entry(int byte_no, ++ Register method, ++ Register itable_index, ++ Register flags, ++ bool is_invokevirtual, ++ bool is_invokevfinal, /*unused*/ ++ bool is_invokedynamic) { ++ // setup registers ++ const Register cache = t1; ++ const Register index = x14; ++ assert_different_registers(method, flags); ++ assert_different_registers(method, cache, index); ++ assert_different_registers(itable_index, flags); ++ assert_different_registers(itable_index, cache, index); ++ // determine constant pool cache field offsets ++ assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant"); ++ const int method_offset = in_bytes(ConstantPoolCache::base_offset() + ++ (is_invokevirtual ? ++ ConstantPoolCacheEntry::f2_offset() : ++ ConstantPoolCacheEntry::f1_offset())); ++ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()); ++ // access constant pool cache fields ++ const int index_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()); ++ ++ const size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2)); ++ resolve_cache_and_index(byte_no, cache, index, index_size); ++ __ ld(method, Address(cache, method_offset)); ++ ++ if (itable_index != noreg) { ++ __ ld(itable_index, Address(cache, index_offset)); ++ } ++ __ lwu(flags, Address(cache, flags_offset)); ++} ++ ++// The registers cache and index expected to be set before call. ++// Correct values of the cache and index registers are preserved. ++void TemplateTable::jvmti_post_field_access(Register cache, Register index, ++ bool is_static, bool has_tos) { ++ // do the JVMTI work here to avoid disturbing the register state below ++ // We use c_rarg registers here beacause we want to use the register used in ++ // the call to the VM ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ assert_different_registers(cache, index, x10); ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), offset); ++ __ lwu(x10, Address(t0, offset)); ++ ++ __ beqz(x10, L1); ++ ++ __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1); ++ __ la(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset()))); ++ ++ if (is_static) { ++ __ mv(c_rarg1, zr); // NULL object reference ++ } else { ++ __ ld(c_rarg1, at_tos()); // get object pointer without popping it ++ __ verify_oop(c_rarg1); ++ } ++ // c_rarg1: object pointer or NULL ++ // c_rarg2: cache entry pointer ++ // c_rarg3: jvalue object on the stack ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ c_rarg1, c_rarg2, c_rarg3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++void TemplateTable::pop_and_check_object(Register r) ++{ ++ __ pop_ptr(r); ++ __ null_check(r); // for field access must check obj. ++ __ verify_oop(r); ++} ++ ++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) ++{ ++ const Register cache = x12; ++ const Register index = x13; ++ const Register obj = x14; ++ const Register off = x9; ++ const Register flags = x10; ++ const Register raw_flags = x16; ++ const Register bc = x14; // uses same reg as obj, so don't mix them ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_access(cache, index, is_static, false); ++ load_field_cp_cache_entry(obj, cache, index, off, raw_flags, is_static); ++ ++ if (!is_static) { ++ // obj is on the stack ++ pop_and_check_object(obj); ++ } ++ ++ if (!UseBarriersForVolatile) { ++ Label notVolatile; ++ __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ __ add(off, obj, off); ++ const Address field(off); ++ ++ Label Done, notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; ++ ++ __ slli(flags, raw_flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ++ ConstantPoolCacheEntry::tos_state_bits)); ++ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); ++ ++ assert(btos == 0, "change code, btos != 0"); ++ __ bnez(flags, notByte); ++ ++ // Dont't rewrite getstatic, only getfield ++ if (is_static) { ++ rc = may_not_rewrite; ++ } ++ ++ // btos ++ __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg); ++ __ push(btos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11); ++ } ++ __ j(Done); ++ ++ __ bind(notByte); ++ __ sub(t0, flags, (u1)ztos); ++ __ bnez(t0, notBool); ++ ++ // ztos (same code as btos) ++ __ access_load_at(T_BOOLEAN, IN_HEAP, x10, field, noreg, noreg); ++ __ push(ztos); ++ // Rewirte bytecode to be faster ++ if (rc == may_rewrite) { ++ // uses btos rewriting, no truncating to t/f bit is needed for getfield ++ patch_bytecode(Bytecodes::_fast_bgetfield, bc, x11); ++ } ++ __ j(Done); ++ ++ __ bind(notBool); ++ __ sub(t0, flags, (u1)atos); ++ __ bnez(t0, notObj); ++ // atos ++ do_oop_load(_masm, field, x10, IN_HEAP); ++ __ push(atos); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_agetfield, bc, x11); ++ } ++ __ j(Done); ++ ++ __ bind(notObj); ++ __ sub(t0, flags, (u1)itos); ++ __ bnez(t0, notInt); ++ // itos ++ __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg); ++ __ addw(x10, x10, zr); // signed extended ++ __ push(itos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_igetfield, bc, x11); ++ } ++ __ j(Done); ++ ++ __ bind(notInt); ++ __ sub(t0, flags, (u1)ctos); ++ __ bnez(t0, notChar); ++ // ctos ++ __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg); ++ __ push(ctos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cgetfield, bc, x11); ++ } ++ __ j(Done); ++ ++ __ bind(notChar); ++ __ sub(t0, flags, (u1)stos); ++ __ bnez(t0, notShort); ++ // stos ++ __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg); ++ __ push(stos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sgetfield, bc, x11); ++ } ++ __ j(Done); ++ ++ __ bind(notShort); ++ __ sub(t0, flags, (u1)ltos); ++ __ bnez(t0, notLong); ++ // ltos ++ __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg); ++ __ push(ltos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_lgetfield, bc, x11); ++ } ++ __ j(Done); ++ ++ __ bind(notLong); ++ __ sub(t0, flags, (u1)ftos); ++ __ bnez(t0, notFloat); ++ // ftos ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ __ push(ftos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fgetfield, bc, x11); ++ } ++ __ j(Done); ++ ++ __ bind(notFloat); ++#ifdef ASSERT ++ __ sub(t0, flags, (u1)dtos); ++ __ bnez(t0, notDouble); ++#endif ++ // dtos ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ __ push(dtos); ++ // Rewrite bytecode to be faster ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dgetfield, bc, x11); ++ } ++#ifdef ASSERT ++ __ j(Done); ++ ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ Label notVolatile; ++ __ andi(t0, raw_flags, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ bind(notVolatile); ++} ++ ++void TemplateTable::getfield(int byte_no) ++{ ++ getfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_getfield(int byte_no) { ++ getfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::getstatic(int byte_no) ++{ ++ getfield_or_static(byte_no, true); ++} ++ ++// The registers cache and index expected to be set before call. ++// The function may destroy various registers, just not the cache and index registers. ++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { ++ transition(vtos, vtos); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L1; ++ assert_different_registers(cache, index, x10); ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset); ++ __ lwu(x10, Address(t0, offset)); ++ __ beqz(x10, L1); ++ ++ __ get_cache_and_index_at_bcp(c_rarg2, t0, 1); ++ ++ if (is_static) { ++ // Life is simple. Null out the object pointer. ++ __ mv(c_rarg1, zr); ++ } else { ++ // Life is harder. The stack holds the value on top, followed by ++ // the object. We don't know the size of the value, though; it ++ // could be one or two words depending on its type. As a result, ++ // we must find the type to determine where the object is. ++ __ lwu(c_rarg3, Address(c_rarg2, ++ in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset()))); ++ __ srli(c_rarg3, c_rarg3, ConstantPoolCacheEntry::tos_state_shift); ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ Label nope2, done, ok; ++ __ ld(c_rarg1, at_tos_p1()); // initially assume a one word jvalue ++ __ sub(t0, c_rarg3, ltos); ++ __ beqz(t0, ok); ++ __ sub(t0, c_rarg3, dtos); ++ __ bnez(t0, nope2); ++ __ bind(ok); ++ __ ld(c_rarg1, at_tos_p2()); // ltos (two word jvalue); ++ __ bind(nope2); ++ } ++ // cache entry pointer ++ __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset)); ++ // object (tos) ++ __ mv(c_rarg3, esp); ++ // c_rarg1: object pointer set up above (NULL if static) ++ // c_rarg2: cache entry pointer ++ // c_rarg3: jvalue object on the stack ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ c_rarg1, c_rarg2, c_rarg3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = x12; ++ const Register index = x13; ++ const Register obj = x12; ++ const Register off = x9; ++ const Register flags = x10; ++ const Register bc = x14; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_mod(cache, index, is_static); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ Label Done; ++ __ mv(x15, flags); ++ ++ { ++ Label notVolatile; ++ __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); ++ __ bind(notVolatile); ++ } ++ ++ Label notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; ++ ++ __ slli(flags, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ++ ConstantPoolCacheEntry::tos_state_bits)); ++ __ srli(flags, flags, XLEN - ConstantPoolCacheEntry::tos_state_bits); ++ ++ assert(btos == 0, "change code, btos != 0"); ++ __ bnez(flags, notByte); ++ ++ // Don't rewrite putstatic, only putfield ++ if (is_static) { ++ rc = may_not_rewrite; ++ } ++ ++ // btos ++ { ++ __ pop(btos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); // off register as temparator register. ++ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } ++ ++ __ bind(notByte); ++ __ sub(t0, flags, (u1)ztos); ++ __ bnez(t0, notBool); ++ ++ // ztos ++ { ++ __ pop(ztos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_zputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } ++ ++ __ bind(notBool); ++ __ sub(t0, flags, (u1)atos); ++ __ bnez(t0, notObj); ++ ++ // atos ++ { ++ __ pop(atos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ // Store into the field ++ do_oop_store(_masm, field, x10, IN_HEAP); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_aputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } ++ ++ __ bind(notObj); ++ __ sub(t0, flags, (u1)itos); ++ __ bnez(t0, notInt); ++ ++ // itos ++ { ++ __ pop(itos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_iputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } ++ ++ __ bind(notInt); ++ __ sub(t0, flags, (u1)ctos); ++ __ bnez(t0, notChar); ++ ++ // ctos ++ { ++ __ pop(ctos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } ++ ++ __ bind(notChar); ++ __ sub(t0, flags, (u1)stos); ++ __ bnez(t0, notShort); ++ ++ // stos ++ { ++ __ pop(stos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } ++ ++ __ bind(notShort); ++ __ sub(t0, flags, (u1)ltos); ++ __ bnez(t0, notLong); ++ ++ // ltos ++ { ++ __ pop(ltos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_lputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } ++ ++ __ bind(notLong); ++ __ sub(t0, flags, (u1)ftos); ++ __ bnez(t0, notFloat); ++ ++ // ftos ++ { ++ __ pop(ftos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fputfield, bc, x11, true, byte_no); ++ } ++ __ j(Done); ++ } ++ ++ __ bind(notFloat); ++#ifdef ASSERT ++ __ sub(t0, flags, (u1)dtos); ++ __ bnez(t0, notDouble); ++#endif ++ ++ // dtos ++ { ++ __ pop(dtos); ++ // field address ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ add(off, obj, off); // if static, obj from cache, else obj from stack. ++ const Address field(off, 0); ++ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg); ++ if (rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dputfield, bc, x11, true, byte_no); ++ } ++ } ++ ++#ifdef ASSERT ++ __ j(Done); ++ ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ andi(t0, x15, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::putfield(int byte_no) ++{ ++ putfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_putfield(int byte_no) { ++ putfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::putstatic(int byte_no) { ++ putfield_or_static(byte_no, true); ++} ++ ++void TemplateTable::jvmti_post_fast_field_mod() ++{ ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L2; ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()), offset); ++ __ lwu(c_rarg3, Address(t0, offset)); ++ __ beqz(c_rarg3, L2); ++ __ pop_ptr(x9); // copy the object pointer from tos ++ __ verify_oop(x9); ++ __ push_ptr(x9); // put the object pointer back on tos ++ // Save tos values before call_VM() clobbers them. Since we have ++ // to do it for every data type, we use the saved values as the ++ // jvalue object. ++ switch (bytecode()) { // load values into the jvalue object ++ case Bytecodes::_fast_aputfield: __ push_ptr(x10); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ push_i(x10); break; ++ case Bytecodes::_fast_dputfield: __ push_d(); break; ++ case Bytecodes::_fast_fputfield: __ push_f(); break; ++ case Bytecodes::_fast_lputfield: __ push_l(x10); break; ++ ++ default: ++ ShouldNotReachHere(); ++ } ++ __ mv(c_rarg3, esp); // points to jvalue on the stack ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(c_rarg2, x10, 1); ++ __ verify_oop(x9); ++ // x9: object pointer copied above ++ // c_rarg2: cache entry pointer ++ // c_rarg3: jvalue object on the stack ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ x9, c_rarg2, c_rarg3); ++ ++ switch (bytecode()) { // restore tos values ++ case Bytecodes::_fast_aputfield: __ pop_ptr(x10); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ pop_i(x10); break; ++ case Bytecodes::_fast_dputfield: __ pop_d(); break; ++ case Bytecodes::_fast_fputfield: __ pop_f(); break; ++ case Bytecodes::_fast_lputfield: __ pop_l(x10); break; ++ default: break; ++ } ++ __ bind(L2); ++ } ++} ++ ++void TemplateTable::fast_storefield(TosState state) ++{ ++ transition(state, vtos); ++ ++ ByteSize base = ConstantPoolCache::base_offset(); ++ ++ jvmti_post_fast_field_mod(); ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(x12, x11, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ membar(MacroAssembler::LoadLoad); ++ ++ // test for volatile with x13 ++ __ lwu(x13, Address(x12, in_bytes(base + ++ ConstantPoolCacheEntry::flags_offset()))); ++ ++ // replace index with field offset from cache entry ++ __ ld(x11, Address(x12, in_bytes(base + ConstantPoolCacheEntry::f2_offset()))); ++ ++ { ++ Label notVolatile; ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::StoreStore | MacroAssembler::LoadStore); ++ __ bind(notVolatile); ++ } ++ ++ // Get object from stack ++ pop_and_check_object(x12); ++ ++ // field address ++ __ add(x11, x12, x11); ++ const Address field(x11, 0); ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_aputfield: ++ do_oop_store(_masm, field, x10, IN_HEAP); ++ break; ++ case Bytecodes::_fast_lputfield: ++ __ access_store_at(T_LONG, IN_HEAP, field, x10, noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_iputfield: ++ __ access_store_at(T_INT, IN_HEAP, field, x10, noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_zputfield: ++ __ access_store_at(T_BOOLEAN, IN_HEAP, field, x10, noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_bputfield: ++ __ access_store_at(T_BYTE, IN_HEAP, field, x10, noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_sputfield: ++ __ access_store_at(T_SHORT, IN_HEAP, field, x10, noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_cputfield: ++ __ access_store_at(T_CHAR, IN_HEAP, field, x10, noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_fputfield: ++ __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_dputfield: ++ __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg, noreg); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::StoreLoad | MacroAssembler::StoreStore); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::fast_accessfield(TosState state) ++{ ++ transition(atos, state); ++ // Do the JVMTI work here to avoid disturbing the register state below ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ int32_t offset = 0; ++ __ la_patchable(t0, ExternalAddress((address)JvmtiExport::get_field_access_count_addr()), offset); ++ __ lwu(x12, Address(t0, offset)); ++ __ beqz(x12, L1); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(c_rarg2, t1, 1); ++ __ verify_oop(x10); ++ __ push_ptr(x10); // save object pointer before call_VM() clobbers it ++ __ mv(c_rarg1, x10); ++ // c_rarg1: object pointer copied above ++ // c_rarg2: cache entry pointer ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ c_rarg1, c_rarg2); ++ __ pop_ptr(x10); // restore object pointer ++ __ bind(L1); ++ } ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(x12, x11, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ membar(MacroAssembler::LoadLoad); ++ ++ __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()))); ++ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()))); ++ ++ // x10: object ++ __ verify_oop(x10); ++ __ null_check(x10); ++ __ add(x11, x10, x11); ++ const Address field(x11, 0); ++ ++ if (!UseBarriersForVolatile) { ++ Label notVolatile; ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_agetfield: ++ do_oop_load(_masm, field, x10, IN_HEAP); ++ __ verify_oop(x10); ++ break; ++ case Bytecodes::_fast_lgetfield: ++ __ access_load_at(T_LONG, IN_HEAP, x10, field, noreg, noreg); ++ break; ++ case Bytecodes::_fast_igetfield: ++ __ access_load_at(T_INT, IN_HEAP, x10, field, noreg, noreg); ++ __ addw(x10, x10, zr); // signed extended ++ break; ++ case Bytecodes::_fast_bgetfield: ++ __ access_load_at(T_BYTE, IN_HEAP, x10, field, noreg, noreg); ++ break; ++ case Bytecodes::_fast_sgetfield: ++ __ access_load_at(T_SHORT, IN_HEAP, x10, field, noreg, noreg); ++ break; ++ case Bytecodes::_fast_cgetfield: ++ __ access_load_at(T_CHAR, IN_HEAP, x10, field, noreg, noreg); ++ break; ++ case Bytecodes::_fast_fgetfield: ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ break; ++ case Bytecodes::_fast_dgetfield: ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ { ++ Label notVolatile; ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::fast_xaccess(TosState state) ++{ ++ transition(vtos, state); ++ ++ // get receiver ++ __ ld(x10, aaddress(0)); ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(x12, x13, 2); ++ __ ld(x11, Address(x12, in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()))); ++ ++ if (!UseBarriersForVolatile) { ++ Label notVolatile; ++ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()))); ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::AnyAny); ++ __ bind(notVolatile); ++ } ++ ++ // make sure exception is reported in correct bcp range (getfield is ++ // next instruction) ++ __ addi(xbcp, xbcp, 1); ++ __ null_check(x10); ++ switch (state) { ++ case itos: ++ __ add(x10, x10, x11); ++ __ access_load_at(T_INT, IN_HEAP, x10, Address(x10, 0), noreg, noreg); ++ __ addw(x10, x10, zr); // signed extended ++ break; ++ case atos: ++ __ add(x10, x10, x11); ++ do_oop_load(_masm, Address(x10, 0), x10, IN_HEAP); ++ __ verify_oop(x10); ++ break; ++ case ftos: ++ __ add(t0, x10, x11); ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, Address(t0), noreg, noreg); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ lwu(x13, Address(x12, in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()))); ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_volatile_shift); ++ __ beqz(t0, notVolatile); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ bind(notVolatile); ++ } ++ ++ __ sub(xbcp, xbcp, 1); ++} ++ ++//----------------------------------------------------------------------------- ++// Calls ++ ++void TemplateTable::count_calls(Register method, Register temp) ++{ ++ __ call_Unimplemented(); ++} ++ ++void TemplateTable::prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index, // itable index, MethodType, etc. ++ Register recv, // if caller wants to see it ++ Register flags // if caller wants to test it ++ ) { ++ // determine flags ++ const Bytecodes::Code code = bytecode(); ++ const bool is_invokeinterface = code == Bytecodes::_invokeinterface; ++ const bool is_invokedynamic = code == Bytecodes::_invokedynamic; ++ const bool is_invokehandle = code == Bytecodes::_invokehandle; ++ const bool is_invokevirtual = code == Bytecodes::_invokevirtual; ++ const bool is_invokespecial = code == Bytecodes::_invokespecial; ++ const bool load_receiver = (recv != noreg); ++ const bool save_flags = (flags != noreg); ++ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), ""); ++ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); ++ assert(flags == noreg || flags == x13, ""); ++ assert(recv == noreg || recv == x12, ""); ++ ++ // setup registers & access constant pool cache ++ if (recv == noreg) { ++ recv = x12; ++ } ++ if (flags == noreg) { ++ flags = x13; ++ } ++ assert_different_registers(method, index, recv, flags); ++ ++ // save 'interpreter return address' ++ __ save_bcp(); ++ ++ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); ++ ++ // maybe push appendix to arguments (just before return address) ++ if (is_invokedynamic || is_invokehandle) { ++ Label L_no_push; ++ __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::has_appendix_shift); ++ __ beqz(t0, L_no_push); ++ // Push the appendix as a trailing parameter. ++ // This must be done before we get the receiver, ++ // since the parameter_size includes it. ++ __ push_reg(x9); ++ __ mv(x9, index); ++ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); ++ __ load_resolved_reference_at_index(index, x9); ++ __ pop_reg(x9); ++ __ push_reg(index); // push appendix (MethodType, CallSite, etc.) ++ __ bind(L_no_push); ++ } ++ ++ // load receiver if needed (note: no return address pushed yet) ++ if (load_receiver) { ++ __ andi(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); // parameter_size_mask = 1 << 8 ++ __ shadd(t0, recv, esp, t0, 3); ++ __ ld(recv, Address(t0, -Interpreter::expr_offset_in_bytes(1))); ++ __ verify_oop(recv); ++ } ++ ++ // compute return type ++ __ slli(t1, flags, XLEN - (ConstantPoolCacheEntry::tos_state_shift + ConstantPoolCacheEntry::tos_state_bits)); ++ __ srli(t1, t1, XLEN - ConstantPoolCacheEntry::tos_state_bits); // (1 << 5) - 4 --> 28~31==> t1:0~3 ++ ++ // load return address ++ { ++ const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code); ++ __ mv(t0, table_addr); ++ __ shadd(t0, t1, t0, t1, 3); ++ __ ld(ra, Address(t0, 0)); ++ } ++} ++ ++void TemplateTable::invokevirtual_helper(Register index, ++ Register recv, ++ Register flags) ++{ ++ // Uses temporary registers x10, x13 ++ assert_different_registers(index, recv, x10, x13); ++ // Test for an invoke of a final method ++ Label notFinal; ++ __ andi(t0, flags, 1UL << ConstantPoolCacheEntry::is_vfinal_shift); ++ __ beqz(t0, notFinal); ++ ++ const Register method = index; // method must be xmethod ++ assert(method == xmethod, "methodOop must be xmethod for interpreter calling convention"); ++ ++ // do the call - the index is actually the method to call ++ // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method* ++ ++ // It's final, need a null check here! ++ __ null_check(recv); ++ ++ // profile this call ++ __ profile_final_call(x10); ++ __ profile_arguments_type(x10, method, x14, true); ++ ++ __ jump_from_interpreted(method); ++ ++ __ bind(notFinal); ++ ++ // get receiver klass ++ __ null_check(recv, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(x10, recv); ++ ++ // profile this call ++ __ profile_virtual_call(x10, xlocals, x13); ++ ++ // get target methodOop & entry point ++ __ lookup_virtual_method(x10, index, method); ++ __ profile_arguments_type(x13, method, x14, true); ++ __ jump_from_interpreted(method); ++} ++ ++void TemplateTable::invokevirtual(int byte_no) ++{ ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ ++ prepare_invoke(byte_no, xmethod, noreg, x12, x13); ++ ++ // xmethod: index (actually a Method*) ++ // x12: receiver ++ // x13: flags ++ ++ invokevirtual_helper(xmethod, x12, x13); ++} ++ ++void TemplateTable::invokespecial(int byte_no) ++{ ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ prepare_invoke(byte_no, xmethod, noreg, // get f1 Method* ++ x12); // get receiver also for null check ++ __ verify_oop(x12); ++ __ null_check(x12); ++ // do the call ++ __ profile_call(x10); ++ __ profile_arguments_type(x10, xmethod, xbcp, false); ++ __ jump_from_interpreted(xmethod); ++} ++ ++void TemplateTable::invokestatic(int byte_no) ++{ ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this arugment"); ++ ++ prepare_invoke(byte_no, xmethod); // get f1 Method* ++ // do the call ++ __ profile_call(x10); ++ __ profile_arguments_type(x10, xmethod, x14, false); ++ __ jump_from_interpreted(xmethod); ++} ++ ++void TemplateTable::fast_invokevfinal(int byte_no) ++{ ++ __ call_Unimplemented(); ++} ++ ++void TemplateTable::invokeinterface(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ prepare_invoke(byte_no, x10, xmethod, // get f1 Klass*, f2 Method* ++ x12, x13); // recv, flags ++ ++ // x10: interface klass (from f1) ++ // xmethod: method (from f2) ++ // x12: receiver ++ // x13: flags ++ ++ // First check for Object case, then private interface method, ++ // then regular interface method. ++ ++ // Special case of invokeinterface called for virtual method of ++ // java.lang.Object. See cpCache.cpp for details ++ Label notObjectMethod; ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_forced_virtual_shift); ++ __ beqz(t0, notObjectMethod); ++ ++ invokevirtual_helper(xmethod, x12, x13); ++ __ bind(notObjectMethod); ++ ++ Label no_such_interface; ++ ++ // Check for private method invocation - indicated by vfinal ++ Label notVFinal; ++ __ andi(t0, x13, 1UL << ConstantPoolCacheEntry::is_vfinal_shift); ++ __ beqz(t0, notVFinal); ++ ++ // Check receiver klass into x13 - also a null check ++ __ null_check(x12, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(x13, x12); ++ ++ Label subtype; ++ __ check_klass_subtype(x13, x10, x14, subtype); ++ // If we get here the typecheck failed ++ __ j(no_such_interface); ++ __ bind(subtype); ++ ++ __ profile_final_call(x10); ++ __ profile_arguments_type(x10, xmethod, x14, true); ++ __ jump_from_interpreted(xmethod); ++ ++ __ bind(notVFinal); ++ ++ // Get receiver klass into x13 - also a null check ++ __ restore_locals(); ++ __ null_check(x12, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(x13, x12); ++ ++ Label no_such_method; ++ ++ // Preserve method for the throw_AbstractMethodErrorVerbose. ++ __ mv(x28, xmethod); ++ // Receiver subtype check against REFC. ++ // Superklass in x10. Subklass in x13. Blows t1, x30 ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ x13, x10, noreg, ++ // outputs: scan temp. reg, scan temp. reg ++ t1, x30, ++ no_such_interface, ++ /*return_method=*/false); ++ ++ // profile this call ++ __ profile_virtual_call(x13, x30, x9); ++ ++ // Get declaring interface class from method, and itable index ++ __ ld(x10, Address(xmethod, Method::const_offset())); ++ __ ld(x10, Address(x10, ConstMethod::constants_offset())); ++ __ ld(x10, Address(x10, ConstantPool::pool_holder_offset_in_bytes())); ++ __ lwu(xmethod, Address(xmethod, Method::itable_index_offset())); ++ __ subw(xmethod, xmethod, Method::itable_index_max); ++ __ negw(xmethod, xmethod); ++ ++ // Preserve recvKlass for throw_AbstractMethodErrorVerbose ++ __ mv(xlocals, x13); ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ xlocals, x10, xmethod, ++ // outputs: method, scan temp. reg ++ xmethod, x30, ++ no_such_interface); ++ ++ // xmethod: methodOop to call ++ // x12: receiver ++ // Check for abstract method error ++ // Note: This should be done more efficiently via a throw_abstract_method_error ++ // interpreter entry point and a conditional jump to it in case of a null ++ // method. ++ __ beqz(xmethod, no_such_method); ++ ++ __ profile_arguments_type(x13, xmethod, x30, true); ++ ++ // do the call ++ // x12: receiver ++ // xmethod,: methodOop ++ __ jump_from_interpreted(xmethod); ++ __ should_not_reach_here(); ++ ++ // exception handling code follows ... ++ // note: must restore interpreter registers to canonical ++ // state for exception handling to work correctly! ++ ++ __ bind(no_such_method); ++ // throw exception ++ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) ++ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) ++ // Pass arguments for generating a verbose error message. ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), x13, x28); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ __ bind(no_such_interface); ++ // throw exceptiong ++ __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) ++ __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) ++ // Pass arguments for generating a verbose error message. ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), x13, x10); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ return; ++} ++ ++void TemplateTable::invokehandle(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ prepare_invoke(byte_no, xmethod, x10, x12); ++ __ verify_method_ptr(x12); ++ __ verify_oop(x12); ++ __ null_check(x12); ++ ++ // FIXME: profile the LambdaForm also ++ ++ // x30 is safe to use here as a temp reg because it is about to ++ // be clobbered by jump_from_interpreted(). ++ __ profile_final_call(x30); ++ __ profile_arguments_type(x30, xmethod, x14, true); ++ ++ __ jump_from_interpreted(xmethod); ++} ++ ++void TemplateTable::invokedynamic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ prepare_invoke(byte_no, xmethod, x10); ++ ++ // x10: CallSite object (from cpool->resolved_references[]) ++ // xmethod: MH.linkToCallSite method (from f2) ++ ++ // Note: x10_callsite is already pushed by prepare_invoke ++ ++ // %%% should make a type profile for any invokedynamic that takes a ref argument ++ // profile this call ++ __ profile_call(xbcp); ++ __ profile_arguments_type(x13, xmethod, x30, false); ++ ++ __ verify_oop(x10); ++ ++ __ jump_from_interpreted(xmethod); ++} ++ ++//----------------------------------------------------------------------------- ++// Allocation ++ ++void TemplateTable::_new() { ++ transition(vtos, atos); ++ ++ __ get_unsigned_2_byte_index_at_bcp(x13, 1); ++ Label slow_case; ++ Label done; ++ Label initialize_header; ++ Label initialize_object; // including clearing the fields ++ ++ __ get_cpool_and_tags(x14, x10); ++ // Make sure the class we're about to instantiate has been resolved. ++ // This is done before loading InstanceKlass to be consistent with the order ++ // how Constant Pool is update (see ConstantPool::klass_at_put) ++ const int tags_offset = Array::base_offset_in_bytes(); ++ __ add(t0, x10, x13); ++ __ la(t0, Address(t0, tags_offset)); ++ __ membar(MacroAssembler::AnyAny); ++ __ lbu(t0, t0); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ sub(t1, t0, (u1)JVM_CONSTANT_Class); ++ __ bnez(t1, slow_case); ++ ++ // get InstanceKlass ++ __ load_resolved_klass_at_offset(x14, x13, x14, t0); ++ ++ // make sure klass is initialized & doesn't have finalizer ++ // make sure klass is fully initialized ++ __ lbu(t0, Address(x14, InstanceKlass::init_state_offset())); ++ __ sub(t1, t0, (u1)InstanceKlass::fully_initialized); ++ __ bnez(t1, slow_case); ++ ++ // get instance_size in InstanceKlass (scaled to a count of bytes) ++ __ lwu(x13, Address(x14, Klass::layout_helper_offset())); ++ // test to see if it has a finalizer or is malformed in some way ++ __ andi(t0, x13, Klass::_lh_instance_slow_path_bit); ++ __ bnez(t0, slow_case); ++ ++ // Allocate the instance: ++ // If TLAB is enabled: ++ // Try to allocate in the TLAB. ++ // If fails, go to the slow path. ++ // Else If inline contiguous allocations are enabled: ++ // Try to allocate in eden. ++ // If fails due to heap end, go to slow path ++ // ++ // If TLAB is enabled OR inline contiguous is enabled: ++ // Initialize the allocation. ++ // Exit. ++ // Go to slow path. ++ const bool allow_shared_alloc = Universe::heap()->supports_inline_contig_alloc(); ++ ++ if (UseTLAB) { ++ __ tlab_allocate(x10, x13, 0, noreg, x11, slow_case); ++ ++ if (ZeroTLAB) { ++ // the fields have been already cleared ++ __ j(initialize_header); ++ } else { ++ // initialize both the header and fields ++ __ j(initialize_object); ++ } ++ } else { ++ // Allocation in the shared Eden, if allowed. ++ // ++ // x13: instance size in bytes ++ if (allow_shared_alloc) { ++ __ eden_allocate(x10, x13, 0, x28, slow_case); ++ } ++ } ++ ++ // If USETLAB or allow_shared_alloc are true, the object is created above and ++ // there is an initialized need. Otherwise, skip and go to the slow path. ++ if (UseTLAB || allow_shared_alloc) { ++ // The object is initialized before the header. If the object size is ++ // zero, go directly to the header initialization. ++ __ bind(initialize_object); ++ __ sub(x13, x13, sizeof(oopDesc)); ++ __ beqz(x13, initialize_header); ++ ++ // Initialize obejct fields ++ { ++ __ add(x12, x10, sizeof(oopDesc)); ++ Label loop; ++ __ bind(loop); ++ __ sd(zr, Address(x12)); ++ __ add(x12, x12, BytesPerLong); ++ __ sub(x13, x13, BytesPerLong); ++ __ bnez(x13, loop); ++ } ++ ++ // initialize object hader only. ++ __ bind(initialize_header); ++ if (UseBiasedLocking) { ++ __ ld(t0, Address(x14, Klass::prototype_header_offset())); ++ } else { ++ __ mv(t0, (intptr_t)markOopDesc::prototype()); ++ } ++ __ sd(t0, Address(x10, oopDesc::mark_offset_in_bytes())); ++ __ store_klass_gap(x10, zr); // zero klass gap for compressed oops ++ __ store_klass(x10, x14); // store klass last ++ ++ { ++ SkipIfEqual skip(_masm, &DTraceAllocProbes, false); ++ // Trigger dtrace event for fastpath ++ __ push(atos); // save the return value ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), x10); ++ __ pop(atos); // restore the return value ++ } ++ __ j(done); ++ } ++ ++ // slow case ++ __ bind(slow_case); ++ __ get_constant_pool(c_rarg1); ++ __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); ++ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2); ++ __ verify_oop(x10); ++ ++ // continue ++ __ bind(done); ++ // Must prevent reordering of stores for object initialization with stores that publish the new object. ++ __ membar(MacroAssembler::StoreStore); ++} ++ ++void TemplateTable::newarray() { ++ transition(itos, atos); ++ __ load_unsigned_byte(c_rarg1, at_bcp(1)); ++ __ mv(c_rarg2, x10); ++ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), ++ c_rarg1, c_rarg2); ++ // Must prevent reordering of stores for object initialization with stores that publish the new object. ++ __ membar(MacroAssembler::StoreStore); ++} ++ ++void TemplateTable::anewarray() { ++ transition(itos, atos); ++ __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); ++ __ get_constant_pool(c_rarg1); ++ __ mv(c_rarg3, x10); ++ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), ++ c_rarg1, c_rarg2, c_rarg3); ++ // Must prevent reordering of stores for object initialization with stores that publish the new object. ++ __ membar(MacroAssembler::StoreStore); ++} ++ ++void TemplateTable::arraylength() { ++ transition(atos, itos); ++ __ null_check(x10, arrayOopDesc::length_offset_in_bytes()); ++ __ lwu(x10, Address(x10, arrayOopDesc::length_offset_in_bytes())); ++} ++ ++void TemplateTable::checkcast() ++{ ++ transition(atos, atos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beqz(x10, is_null); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array ++ __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index ++ // See if bytecode has already been quicked ++ __ add(t0, x13, Array::base_offset_in_bytes()); ++ __ add(x11, t0, x9); ++ __ membar(MacroAssembler::AnyAny); ++ __ lbu(x11, x11); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ sub(t0, x11, (u1)JVM_CONSTANT_Class); ++ __ beqz(t0, quicked); ++ ++ __ push(atos); // save receiver for result, and for GC ++ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ // vm_result_2 has metadata result ++ __ get_vm_result_2(x10, xthread); ++ __ pop_reg(x13); // restore receiver ++ __ j(resolved); ++ ++ // Get superklass in x10 and subklass in x13 ++ __ bind(quicked); ++ __ mv(x13, x10); // Save object in x13; x10 needed for subtype check ++ __ load_resolved_klass_at_offset(x12, x9, x10, t0); // x10 = klass ++ ++ __ bind(resolved); ++ __ load_klass(x9, x13); ++ ++ // Generate subtype check. Blows x12, x15. Object in x13. ++ // Superklass in x10. Subklass in x9. ++ __ gen_subtype_check(x9, ok_is_subtype); ++ ++ // Come here on failure ++ __ push_reg(x13); ++ // object is at TOS ++ __ j(Interpreter::_throw_ClassCastException_entry); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ mv(x10, x13); // Restore object in x13 ++ ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ j(done); ++ __ bind(is_null); ++ __ profile_null_seen(x12); ++ } else { ++ __ bind(is_null); // same as 'done' ++ } ++ __ bind(done); ++} ++ ++void TemplateTable::instanceof() { ++ transition(atos, itos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beqz(x10, is_null); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(x12, x13); // x12=cpool, x13=tags array ++ __ get_unsigned_2_byte_index_at_bcp(x9, 1); // x9=index ++ // See if bytecode has already been quicked ++ __ add(t0, x13, Array::base_offset_in_bytes()); ++ __ add(x11, t0, x9); ++ __ membar(MacroAssembler::AnyAny); ++ __ lbu(x11, x11); ++ __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); ++ __ sub(t0, x11, (u1)JVM_CONSTANT_Class); ++ __ beqz(t0, quicked); ++ ++ __ push(atos); // save receiver for result, and for GC ++ call_VM(x10, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ // vm_result_2 has metadata result ++ __ get_vm_result_2(x10, xthread); ++ __ pop_reg(x13); // restore receiver ++ __ verify_oop(x13); ++ __ load_klass(x13, x13); ++ __ j(resolved); ++ ++ // Get superklass in x10 and subklass in x13 ++ __ bind(quicked); ++ __ load_klass(x13, x10); ++ __ load_resolved_klass_at_offset(x12, x9, x10, t0); ++ ++ __ bind(resolved); ++ ++ // Generate subtype check. Blows x12, x15 ++ // Superklass in x10. Subklass in x13. ++ __ gen_subtype_check(x13, ok_is_subtype); ++ ++ // Come here on failure ++ __ mv(x10, zr); ++ __ j(done); ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ mv(x10, 1); ++ ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ j(done); ++ __ bind(is_null); ++ __ profile_null_seen(x12); ++ } else { ++ __ bind(is_null); // same as 'done' ++ } ++ __ bind(done); ++ // x10 = 0: obj == NULL or obj is not an instanceof the specified klass ++ // x10 = 1: obj != NULL and obj is an instanceof the specified klass ++} ++ ++//----------------------------------------------------------------------------- ++// Breakpoints ++void TemplateTable::_breakpoint() { ++ // Note: We get here even if we are single stepping.. ++ // jbug inists on setting breakpoints at every bytecode ++ // even if we are in single step mode. ++ ++ transition(vtos, vtos); ++ ++ // get the unpatched byte code ++ __ get_method(c_rarg1); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::get_original_bytecode_at), ++ c_rarg1, xbcp); ++ __ mv(x9, x10); ++ ++ // post the breakpoint event ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), ++ xmethod, xbcp); ++ ++ // complete the execution of original bytecode ++ __ mv(t0, x9); ++ __ dispatch_only_normal(vtos); ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateTable::athrow() { ++ transition(atos, vtos); ++ __ null_check(x10); ++ __ j(Interpreter::throw_exception_entry()); ++} ++ ++//----------------------------------------------------------------------------- ++// Synchronization ++// ++// Note: monitorenter & exit are symmetric routines; which is reflected ++// in the assembly code structure as well ++// ++// Stack layout: ++// ++// [expressions ] <--- esp = expression stack top ++// .. ++// [expressions ] ++// [monitor entry] <--- monitor block top = expression stack bot ++// .. ++// [monitor entry] ++// [frame data ] <--- monitor block bot ++// ... ++// [saved fp ] <--- fp ++void TemplateTable::monitorenter() ++{ ++ transition(atos, vtos); ++ ++ // check for NULL object ++ __ null_check(x10); ++ ++ const Address monitor_block_top( ++ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ const Address monitor_block_bot( ++ fp, frame::interpreter_frame_initial_sp_offset * wordSize); ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++ Label allocated; ++ ++ // initialize entry pointer ++ __ mv(c_rarg1, zr); // points to free slot or NULL ++ ++ // find a free slot in the monitor block (result in c_rarg1) ++ { ++ Label entry, loop, exit, notUsed; ++ __ ld(c_rarg3, monitor_block_top); // points to current entry, ++ // starting with top-most entry ++ __ la(c_rarg2, monitor_block_bot); // points to word before bottom ++ ++ __ j(entry); ++ ++ __ bind(loop); ++ // check if current entry is used ++ // if not used then remember entry in c_rarg1 ++ __ ld(t0, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes())); ++ __ bnez(t0, notUsed); ++ __ mv(c_rarg1, c_rarg3); ++ __ bind(notUsed); ++ // check if current entry is for same object ++ // if same object then stop searching ++ __ beq(x10, t0, exit); ++ // otherwise advance to next entry ++ __ add(c_rarg3, c_rarg3, entry_size); ++ __ bind(entry); ++ // check if bottom reached ++ // if not at bottom then check this entry ++ __ bne(c_rarg3, c_rarg2, loop); ++ __ bind(exit); ++ } ++ ++ __ bnez(c_rarg1, allocated); // check if a slot has been found and ++ // if found, continue with that on ++ ++ // allocate one if there's no free slot ++ { ++ Label entry, loop; ++ // 1. compute new pointers // esp: old expression stack top ++ __ ld(c_rarg1, monitor_block_bot); // c_rarg1: old expression stack bottom ++ __ sub(esp, esp, entry_size); // move expression stack top ++ __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom ++ __ mv(c_rarg3, esp); // set start value for copy loop ++ __ sd(c_rarg1, monitor_block_bot); // set new monitor block bottom ++ __ sub(sp, sp, entry_size); // make room for the monitor ++ ++ __ j(entry); ++ // 2. move expression stack contents ++ __ bind(loop); ++ __ ld(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack ++ // word from old location ++ __ sd(c_rarg2, Address(c_rarg3, 0)); // and store it at new location ++ __ add(c_rarg3, c_rarg3, wordSize); // advance to next word ++ __ bind(entry); ++ __ bne(c_rarg3, c_rarg1, loop); // check if bottom reached.if not at bottom ++ // then copy next word ++ } ++ ++ // call run-time routine ++ // c_rarg1: points to monitor entry ++ __ bind(allocated); ++ ++ // Increment bcp to point to the next bytecode, so exception ++ // handling for async. exceptions work correctly. ++ // The object has already been poped from the stack, so the ++ // expression stack looks correct. ++ __ addi(xbcp, xbcp, 1); ++ ++ // store object ++ __ sd(x10, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); ++ __ lock_object(c_rarg1); ++ ++ // check to make sure this monitor doesn't cause stack overflow after locking ++ __ save_bcp(); // in case of exception ++ __ generate_stack_overflow_check(0); ++ ++ // The bcp has already been incremented. Just need to dispatch to ++ // next instruction. ++ __ dispatch_next(vtos); ++} ++ ++void TemplateTable::monitorexit() ++{ ++ transition(atos, vtos); ++ ++ // check for NULL object ++ __ null_check(x10); ++ ++ const Address monitor_block_top( ++ fp, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ const Address monitor_block_bot( ++ fp, frame::interpreter_frame_initial_sp_offset * wordSize); ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++ Label found; ++ ++ // find matching slot ++ { ++ Label entry, loop; ++ __ ld(c_rarg1, monitor_block_top); // points to current entry, ++ // starting with top-most entry ++ __ la(c_rarg2, monitor_block_bot); // points to word before bottom ++ // of monitor block ++ __ j(entry); ++ ++ __ bind(loop); ++ // check if current entry is for same object ++ __ ld(t0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); ++ // if same object then stop searching ++ __ beq(x10, t0, found); ++ // otherwise advance to next entry ++ __ add(c_rarg1, c_rarg1, entry_size); ++ __ bind(entry); ++ // check if bottom reached ++ // if not at bottom then check this entry ++ __ bne(c_rarg1, c_rarg2, loop); ++ } ++ ++ // error handling. Unlocking was not block-structured ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ // call run-time routine ++ __ bind(found); ++ __ push_ptr(x10); // make sure object is on stack (contract with oopMaps) ++ __ unlock_object(c_rarg1); ++ __ pop_ptr(x10); // discard object ++} ++ ++// Wide instructions ++void TemplateTable::wide() ++{ ++ __ load_unsigned_byte(x9, at_bcp(1)); ++ __ mv(t0, (address)Interpreter::_wentry_point); ++ __ shadd(t0, x9, t0, t1, 3); ++ __ ld(t0, Address(t0)); ++ __ jr(t0); ++} ++ ++// Multi arrays ++void TemplateTable::multianewarray() { ++ transition(vtos, atos); ++ __ load_unsigned_byte(x10, at_bcp(3)); // get number of dimensions ++ // last dim is on top of stack; we want address of first one: ++ // first_addr = last_addr + (ndims - 1) * wordSize ++ __ shadd(c_rarg1, x10, esp, c_rarg1, 3); ++ __ sub(c_rarg1, c_rarg1, wordSize); ++ call_VM(x10, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), ++ c_rarg1); ++ __ load_unsigned_byte(x11, at_bcp(3)); ++ __ shadd(esp, x11, esp, t0, 3); ++} +diff --git a/src/hotspot/cpu/riscv/templateTable_riscv.hpp b/src/hotspot/cpu/riscv/templateTable_riscv.hpp +new file mode 100644 +index 000000000..b437c8f4c +--- /dev/null ++++ b/src/hotspot/cpu/riscv/templateTable_riscv.hpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_TEMPLATETABLE_RISCV_HPP ++#define CPU_RISCV_TEMPLATETABLE_RISCV_HPP ++ ++static void prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index = noreg, // itable index, MethodType, etc. ++ Register recv = noreg, // if caller wants to see it ++ Register flags = noreg // if caller wants to test it ++ ); ++static void invokevirtual_helper(Register index, Register recv, ++ Register flags); ++ ++// Helpers ++static void index_check(Register array, Register index); ++ ++#endif // CPU_RISCV_TEMPLATETABLE_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/vmStructs_riscv.hpp b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp +new file mode 100644 +index 000000000..03079aec0 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/vmStructs_riscv.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_VMSTRUCTS_RISCV_HPP ++#define CPU_RISCV_VMSTRUCTS_RISCV_HPP ++ ++// These are the CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) ++ ++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // CPU_RISCV_VMSTRUCTS_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp +new file mode 100644 +index 000000000..dd4f5c9ae +--- /dev/null ++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.cpp +@@ -0,0 +1,91 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "memory/allocation.hpp" ++#include "memory/allocation.inline.hpp" ++#include "runtime/os.inline.hpp" ++#include "vm_version_ext_riscv.hpp" ++ ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ int core_id = -1; ++ int chip_id = -1; ++ int len = 0; ++ char* src_string = NULL; ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "RISCV64"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "RISCV64 %s", _features_string); ++ _initialized = true; ++} ++ ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; ++} ++ ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; ++} ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff --git a/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp +new file mode 100644 +index 000000000..0982b6668 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/vm_version_ext_riscv.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_VM_VERSION_EXT_RISCV_HPP ++#define CPU_RISCV_VM_VERSION_EXT_RISCV_HPP ++ ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++ ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; ++ ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); ++ ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); ++ ++}; ++ ++#endif // CPU_RISCV_VM_VERSION_EXT_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +new file mode 100644 +index 000000000..31d5bb5f4 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp +@@ -0,0 +1,190 @@ ++/* ++ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/java.hpp" ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++#include "utilities/formatBuffer.hpp" ++ ++#include OS_HEADER_INLINE(os) ++ ++const char* VM_Version::_uarch = ""; ++uint32_t VM_Version::_initial_vector_length = 0; ++ ++void VM_Version::initialize() { ++ get_os_cpu_info(); ++ ++ if (FLAG_IS_DEFAULT(UseFMA)) { ++ FLAG_SET_DEFAULT(UseFMA, true); ++ } ++ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0); ++ } ++ ++ if (UseAES || UseAESIntrinsics) { ++ if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { ++ warning("AES instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAES, false); ++ } ++ if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { ++ warning("AES intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESIntrinsics, false); ++ } ++ } ++ ++ if (UseAESCTRIntrinsics) { ++ warning("AES/CTR intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); ++ } ++ ++ if (UseSHA) { ++ warning("SHA instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ if (UseSHA1Intrinsics) { ++ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); ++ } ++ ++ if (UseSHA256Intrinsics) { ++ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); ++ } ++ ++ if (UseSHA512Intrinsics) { ++ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); ++ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); ++ } ++ ++ if (UseCRC32Intrinsics) { ++ warning("CRC32Intrinsics instructions are not available on this CPU."); ++ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); ++ } ++ ++ if (UseCRC32CIntrinsics) { ++ warning("CRC32CIntrinsics instructions are not available on this CPU."); ++ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); ++ } ++ ++ if (UseRVV) { ++ if (!(_features & CPU_V)) { ++ warning("RVV is not supported on this CPU"); ++ FLAG_SET_DEFAULT(UseRVV, false); ++ } else { ++ // read vector length from vector CSR vlenb ++ _initial_vector_length = get_current_vector_length(); ++ } ++ } ++ ++ if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) { ++ FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true); ++ } ++ ++ if (UseZbb) { ++ if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { ++ FLAG_SET_DEFAULT(UsePopCountInstruction, true); ++ } ++ } else { ++ FLAG_SET_DEFAULT(UsePopCountInstruction, false); ++ } ++ ++ char buf[512]; ++ buf[0] = '\0'; ++ if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch); ++ strcat(buf, "rv64"); ++#define ADD_FEATURE_IF_SUPPORTED(id, name, bit) if (_features & CPU_##id) strcat(buf, name); ++ CPU_FEATURE_FLAGS(ADD_FEATURE_IF_SUPPORTED) ++#undef ADD_FEATURE_IF_SUPPORTED ++ ++ _features_string = os::strdup(buf); ++ ++#ifdef COMPILER2 ++ initialize_c2(); ++#endif // COMPILER2 ++} ++ ++#ifdef COMPILER2 ++void VM_Version::initialize_c2() { ++ // lack of cmove in riscv ++ if (UseCMoveUnconditionally) { ++ FLAG_SET_DEFAULT(UseCMoveUnconditionally, false); ++ } ++ if (ConditionalMoveLimit > 0) { ++ FLAG_SET_DEFAULT(ConditionalMoveLimit, 0); ++ } ++ ++ if (!UseRVV) { ++ FLAG_SET_DEFAULT(SpecialEncodeISOArray, false); ++ } ++ ++ if (!UseRVV && MaxVectorSize) { ++ FLAG_SET_DEFAULT(MaxVectorSize, 0); ++ } ++ ++ if (UseRVV) { ++ if (FLAG_IS_DEFAULT(MaxVectorSize)) { ++ MaxVectorSize = _initial_vector_length; ++ } else if (MaxVectorSize < 16) { ++ warning("RVV does not support vector length less than 16 bytes. Disabling RVV."); ++ UseRVV = false; ++ } else if (is_power_of_2(MaxVectorSize)) { ++ if (MaxVectorSize > _initial_vector_length) { ++ warning("Current system only supports max RVV vector length %d. Set MaxVectorSize to %d", ++ _initial_vector_length, _initial_vector_length); ++ } ++ MaxVectorSize = _initial_vector_length; ++ } else { ++ vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize)); ++ } ++ } ++ ++ // disable prefetch ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMulAddIntrinsic, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true); ++ } ++ ++ if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { ++ FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, true); ++ } ++} ++#endif // COMPILER2 +diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.hpp b/src/hotspot/cpu/riscv/vm_version_riscv.hpp +new file mode 100644 +index 000000000..0178e6d75 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/vm_version_riscv.hpp +@@ -0,0 +1,65 @@ ++/* ++ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_VM_VERSION_RISCV_HPP ++#define CPU_RISCV_VM_VERSION_RISCV_HPP ++ ++#include "runtime/abstract_vm_version.hpp" ++#include "runtime/globals_extension.hpp" ++#include "utilities/sizes.hpp" ++ ++class VM_Version : public Abstract_VM_Version { ++public: ++ // Initialization ++ static void initialize(); ++ ++ enum Feature_Flag { ++#define CPU_FEATURE_FLAGS(decl) \ ++ decl(I, "i", 8) \ ++ decl(M, "m", 12) \ ++ decl(A, "a", 0) \ ++ decl(F, "f", 5) \ ++ decl(D, "d", 3) \ ++ decl(C, "c", 2) \ ++ decl(V, "v", 21) ++ ++#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1 << bit), ++ CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG) ++#undef DECLARE_CPU_FEATURE_FLAG ++ }; ++ ++protected: ++ static const char* _uarch; ++ static uint32_t _initial_vector_length; ++ static void get_os_cpu_info(); ++ static uint32_t get_current_vector_length(); ++ ++#ifdef COMPILER2 ++private: ++ static void initialize_c2(); ++#endif // COMPILER2 ++}; ++ ++#endif // CPU_RISCV_VM_VERSION_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.cpp b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +new file mode 100644 +index 000000000..6572d9334 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.cpp +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "code/vmreg.hpp" ++ ++void VMRegImpl::set_regName() { ++ Register reg = ::as_Register(0); ++ int i = 0; ++ for ( ; i < ConcreteRegisterImpl::max_gpr ; ) { ++ for (int j = 0; j < RegisterImpl::max_slots_per_register; j++) { ++ regName[i++] = reg->name(); ++ } ++ reg = reg->successor(); ++ } ++ ++ FloatRegister freg = ::as_FloatRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { ++ for (int j = 0; j < FloatRegisterImpl::max_slots_per_register; j++) { ++ regName[i++] = freg->name(); ++ } ++ freg = freg->successor(); ++ } ++ ++ VectorRegister vreg = ::as_VectorRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_vpr ; ) { ++ for (int j = 0; j < VectorRegisterImpl::max_slots_per_register; j++) { ++ regName[i++] = vreg->name(); ++ } ++ vreg = vreg->successor(); ++ } ++ ++ for ( ; i < ConcreteRegisterImpl::number_of_registers; i++) { ++ regName[i] = "NON-GPR-FPR-VPR"; ++ } ++} +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.hpp +new file mode 100644 +index 000000000..ec76a1db1 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.hpp +@@ -0,0 +1,64 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_VMREG_RISCV_HPP ++#define CPU_RISCV_VMREG_RISCV_HPP ++ ++inline bool is_Register() { ++ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; ++} ++ ++inline bool is_FloatRegister() { ++ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; ++} ++ ++inline bool is_VectorRegister() { ++ return value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_vpr; ++} ++ ++inline Register as_Register() { ++ assert( is_Register(), "must be"); ++ return ::as_Register(value() / RegisterImpl::max_slots_per_register); ++} ++ ++inline FloatRegister as_FloatRegister() { ++ assert( is_FloatRegister() && is_even(value()), "must be" ); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / ++ FloatRegisterImpl::max_slots_per_register); ++} ++ ++inline VectorRegister as_VectorRegister() { ++ assert( is_VectorRegister() && ((value() & (VectorRegisterImpl::max_slots_per_register - 1)) == 0), "must be" ); ++ return ::as_VectorRegister((value() - ConcreteRegisterImpl::max_fpr) / ++ VectorRegisterImpl::max_slots_per_register); ++} ++ ++inline bool is_concrete() { ++ assert(is_reg(), "must be"); ++ return is_even(value()); ++} ++ ++#endif // CPU_RISCV_VMREG_RISCV_HPP +diff --git a/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp +new file mode 100644 +index 000000000..9605e59f4 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/vmreg_riscv.inline.hpp +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP ++#define CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP ++ ++inline VMReg RegisterImpl::as_VMReg() { ++ if( this == noreg ) { ++ return VMRegImpl::Bad(); ++ } ++ return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); ++} ++ ++inline VMReg FloatRegisterImpl::as_VMReg() { ++ return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + ++ ConcreteRegisterImpl::max_gpr); ++} ++ ++inline VMReg VectorRegisterImpl::as_VMReg() { ++ return VMRegImpl::as_VMReg((encoding() * VectorRegisterImpl::max_slots_per_register) + ++ ConcreteRegisterImpl::max_fpr); ++} ++ ++#endif // CPU_RISCV_VM_VMREG_RISCV_INLINE_HPP +diff --git a/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp +new file mode 100644 +index 000000000..b2aa87ab8 +--- /dev/null ++++ b/src/hotspot/cpu/riscv/vtableStubs_riscv.cpp +@@ -0,0 +1,260 @@ ++/* ++ * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "assembler_riscv.inline.hpp" ++#include "code/vtableStubs.hpp" ++#include "interp_masm_riscv.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/instanceKlass.hpp" ++#include "oops/klassVtable.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_riscv.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++// machine-dependent part of VtableStubs: create VtableStub of correct size and ++// initialize its code ++ ++#define __ masm-> ++ ++#ifndef PRODUCT ++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); ++#endif ++ ++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(true); ++ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc = NULL; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ assert_cond(masm != NULL); ++ ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ __ la(t2, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); ++ __ increment(Address(t2)); ++ } ++#endif ++ ++ // get receiver (need to skip return address on top of stack) ++ assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); ++ ++ // get receiver klass ++ address npe_addr = __ pc(); ++ __ load_klass(t2, j_rarg0); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ start_pc = __ pc(); ++ ++ // check offset vs vtable length ++ __ lwu(t0, Address(t2, Klass::vtable_length_offset())); ++ __ mvw(t1, vtable_index * vtableEntry::size()); ++ __ bgt(t0, t1, L); ++ __ enter(); ++ __ mv(x12, vtable_index); ++ ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, x12); ++ const ptrdiff_t estimate = 256; ++ const ptrdiff_t codesize = __ pc() - start_pc; ++ slop_delta = estimate - codesize; // call_VM varies in length, depending on data ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); ++ ++ __ leave(); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ ++ start_pc = __ pc(); ++ __ lookup_virtual_method(t2, vtable_index, xmethod); ++ // lookup_virtual_method generates ++ // 4 instructions (maximum value encountered in normal case):li(lui + addiw) + add + ld ++ // 1 instruction (best case):ld * 1 ++ slop_delta = 16 - (int)(__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ __ beqz(xmethod, L); ++ __ ld(t0, Address(xmethod, Method::from_compiled_offset())); ++ __ bnez(t0, L); ++ __ stop("Vtable entry is NULL"); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ ++ // x10: receiver klass ++ // xmethod: Method* ++ // x12: receiver ++ address ame_addr = __ pc(); ++ __ ld(t0, Address(xmethod, Method::from_compiled_offset())); ++ __ jr(t0); ++ ++ masm->flush(); ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0); ++ ++ return s; ++} ++ ++VtableStub* VtableStubs::create_itable_stub(int itable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(false); ++ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc = NULL; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ assert_cond(masm != NULL); ++ ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ __ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); ++ __ increment(Address(x18)); ++ } ++#endif ++ ++ // get receiver (need to skip return address on top of stack) ++ assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); ++ ++ // Entry arguments: ++ // t2: CompiledICHolder ++ // j_rarg0: Receiver ++ ++ // This stub is called from compiled code which has no callee-saved registers, ++ // so all registers except arguments are free at this point. ++ const Register recv_klass_reg = x18; ++ const Register holder_klass_reg = x19; // declaring interface klass (DECC) ++ const Register resolved_klass_reg = xmethod; // resolved interface klass (REFC) ++ const Register temp_reg = x28; ++ const Register temp_reg2 = x29; ++ const Register icholder_reg = t1; ++ ++ Label L_no_such_interface; ++ ++ __ ld(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset())); ++ __ ld(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset())); ++ ++ start_pc = __ pc(); ++ ++ // get receiver klass (also an implicit null-check) ++ address npe_addr = __ pc(); ++ __ load_klass(recv_klass_reg, j_rarg0); ++ ++ // Receiver subtype check against REFC. ++ __ lookup_interface_method(// inputs: rec. class, interface ++ recv_klass_reg, resolved_klass_reg, noreg, ++ // outputs: scan temp. reg1, scan temp. reg2 ++ temp_reg2, temp_reg, ++ L_no_such_interface, ++ /*return_method=*/false); ++ ++ const ptrdiff_t typecheckSize = __ pc() - start_pc; ++ start_pc = __ pc(); ++ ++ // Get selected method from declaring class and itable index ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ recv_klass_reg, holder_klass_reg, itable_index, ++ // outputs: method, scan temp. reg ++ xmethod, temp_reg, ++ L_no_such_interface); ++ ++ const ptrdiff_t lookupSize = __ pc() - start_pc; ++ ++ // Reduce "estimate" such that "padding" does not drop below 8. ++ const ptrdiff_t estimate = 256; ++ const ptrdiff_t codesize = typecheckSize + lookupSize; ++ slop_delta = (int)(estimate - codesize); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); ++ ++#ifdef ASSERT ++ if (DebugVtables) { ++ Label L2; ++ __ beqz(xmethod, L2); ++ __ ld(t0, Address(xmethod, Method::from_compiled_offset())); ++ __ bnez(t0, L2); ++ __ stop("compiler entrypoint is null"); ++ __ bind(L2); ++ } ++#endif // ASSERT ++ ++ // xmethod: Method* ++ // j_rarg0: receiver ++ address ame_addr = __ pc(); ++ __ ld(t0, Address(xmethod, Method::from_compiled_offset())); ++ __ jr(t0); ++ ++ __ bind(L_no_such_interface); ++ // Handle IncompatibleClassChangeError in itable stubs. ++ // More detailed error message. ++ // We force resolving of the call site by jumping to the "handle ++ // wrong method" stub, and so let the interpreter runtime do all the ++ // dirty work. ++ assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order"); ++ __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); ++ ++ masm->flush(); ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); ++ ++ return s; ++} ++ ++int VtableStub::pd_code_alignment() { ++ // riscv cache line size is 64 bytes, but we want to limit alignment loss. ++ const unsigned int icache_line_size = wordSize; ++ return icache_line_size; ++} +diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +index 897be2209..3b836fe6b 100644 +--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp ++++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +@@ -1447,7 +1447,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op + } + + // result = condition ? opr1 : opr2 +-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { ++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, ++ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr || cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on s390"); ++ + Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual; + switch (condition) { + case lir_cond_equal: acond = Assembler::bcondEqual; ncond = Assembler::bcondNotEqual; break; +diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad +index e335f473d..53ad912cb 100644 +--- a/src/hotspot/cpu/s390/s390.ad ++++ b/src/hotspot/cpu/s390/s390.ad +@@ -1522,14 +1522,16 @@ const bool Matcher::match_rule_supported(int opcode) { + // BUT: make sure match rule is not disabled by a false predicate! + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + // TODO + // Identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen. +- bool ret_value = match_rule_supported(opcode); ++ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { ++ return false; ++ } + // Add rules here. + +- return ret_value; // Per default match rules are supported. ++ return true; // Per default match rules are supported. + } + + int Matcher::regnum_to_fpu_offset(int regnum) { +@@ -1578,6 +1580,14 @@ const uint Matcher::vector_shift_count_ideal_reg(int size) { + return Node::NotAMachineReg; + } + ++const bool Matcher::supports_scalable_vector() { ++ return false; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return -1; ++} ++ + // z/Architecture does support misaligned store/load at minimal extra cost. + const bool Matcher::misaligned_vectors_ok() { + return true; +diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad +index 7a2798a51..7d9b17b44 100644 +--- a/src/hotspot/cpu/sparc/sparc.ad ++++ b/src/hotspot/cpu/sparc/sparc.ad +@@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) { + return true; // Per default match rules are supported. + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + + // TODO + // identify extra cases that we might want to provide match rules for +@@ -1764,6 +1764,14 @@ const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); // Same as max. + } + ++const bool Matcher::supports_scalable_vector() { ++ return false; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return -1; ++} ++ + // SPARC doesn't support misaligned vectors store/load. + const bool Matcher::misaligned_vectors_ok() { + return false; +diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +index cee3140f4..d38c63600 100644 +--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp ++++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +@@ -1970,7 +1970,10 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + } + } + +-void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { ++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type, ++ LIR_Opr cmp_opr1, LIR_Opr cmp_opr2) { ++ assert(cmp_opr1 == LIR_OprFact::illegalOpr && cmp_opr2 == LIR_OprFact::illegalOpr, "unnecessary cmp operands on x86"); ++ + Assembler::Condition acond, ncond; + switch (condition) { + case lir_cond_equal: acond = Assembler::equal; ncond = Assembler::notEqual; break; +diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp +index 82fd8522b..8016d328a 100644 +--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp ++++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp +@@ -6606,6 +6606,99 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register + bind(DONE_LABEL); + } // string_indexof_char + ++void MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, ++ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) { ++ ShortBranchVerifier sbv(this); ++ assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); ++ ++ int stride = 16; ++ ++ Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP, ++ SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP, ++ RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT, ++ FOUND_SEQ_CHAR, DONE_LABEL; ++ ++ movptr(result, str1); ++ if (UseAVX >= 2) { ++ cmpl(cnt1, stride); ++ jcc(Assembler::less, SCAN_TO_CHAR_INIT); ++ cmpl(cnt1, stride*2); ++ jcc(Assembler::less, SCAN_TO_16_CHAR_INIT); ++ movdl(vec1, ch); ++ vpbroadcastb(vec1, vec1, Assembler::AVX_256bit); ++ vpxor(vec2, vec2); ++ movl(tmp, cnt1); ++ andl(tmp, 0xFFFFFFE0); //vector count (in chars) ++ andl(cnt1,0x0000001F); //tail count (in chars) ++ ++ bind(SCAN_TO_32_CHAR_LOOP); ++ vmovdqu(vec3, Address(result, 0)); ++ vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit); ++ vptest(vec2, vec3); ++ jcc(Assembler::carryClear, FOUND_CHAR); ++ addptr(result, 32); ++ subl(tmp, stride*2); ++ jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP); ++ jmp(SCAN_TO_16_CHAR); ++ ++ bind(SCAN_TO_16_CHAR_INIT); ++ movdl(vec1, ch); ++ pxor(vec2, vec2); ++ pshufb(vec1, vec2); ++ } ++ ++ bind(SCAN_TO_16_CHAR); ++ cmpl(cnt1, stride); ++ jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left ++ if (UseAVX < 2) { ++ movdl(vec1, ch); ++ pxor(vec2, vec2); ++ pshufb(vec1, vec2); ++ } ++ movl(tmp, cnt1); ++ andl(tmp, 0xFFFFFFF0); //vector count (in bytes) ++ andl(cnt1,0x0000000F); //tail count (in bytes) ++ ++ bind(SCAN_TO_16_CHAR_LOOP); ++ movdqu(vec3, Address(result, 0)); ++ pcmpeqb(vec3, vec1); ++ ptest(vec2, vec3); ++ jcc(Assembler::carryClear, FOUND_CHAR); ++ addptr(result, 16); ++ subl(tmp, stride); ++ jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items... ++ ++ bind(SCAN_TO_CHAR_INIT); ++ testl(cnt1, cnt1); ++ jcc(Assembler::zero, RET_NOT_FOUND); ++ bind(SCAN_TO_CHAR_LOOP); ++ load_unsigned_byte(tmp, Address(result, 0)); ++ cmpl(ch, tmp); ++ jccb(Assembler::equal, FOUND_SEQ_CHAR); ++ addptr(result, 1); ++ subl(cnt1, 1); ++ jccb(Assembler::zero, RET_NOT_FOUND); ++ jmp(SCAN_TO_CHAR_LOOP); ++ ++ bind(RET_NOT_FOUND); ++ movl(result, -1); ++ jmpb(DONE_LABEL); ++ ++ bind(FOUND_CHAR); ++ if (UseAVX >= 2) { ++ vpmovmskb(tmp, vec3); ++ } else { ++ pmovmskb(tmp, vec3); ++ } ++ bsfl(ch, tmp); ++ addptr(result, ch); ++ ++ bind(FOUND_SEQ_CHAR); ++ subptr(result, str1); ++ ++ bind(DONE_LABEL); ++} // stringL_indexof_char ++ + // helper function for string_compare + void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2, + Address::ScaleFactor scale, Address::ScaleFactor scale1, +diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp +index 1bed0cce9..47a062c11 100644 +--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp ++++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp +@@ -1659,6 +1659,8 @@ public: + #ifdef COMPILER2 + void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, + XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); ++ void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, ++ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); + + // IndexOf strings. + // Small strings are loaded through stack if they cross page boundary. +diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad +index baa7cc774..238d8729b 100644 +--- a/src/hotspot/cpu/x86/x86.ad ++++ b/src/hotspot/cpu/x86/x86.ad +@@ -1511,10 +1511,13 @@ const bool Matcher::match_rule_supported(int opcode) { + return ret_value; // Per default match rules are supported. + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); ++ if (!vector_size_supported(bt, vlen)) { ++ ret_value = false; ++ } + if (ret_value) { + switch (opcode) { + case Op_AbsVB: +@@ -1642,6 +1645,15 @@ const int Matcher::min_vector_size(const BasicType bt) { + return MIN2(size,max_size); + } + ++const bool Matcher::supports_scalable_vector() { ++ return false; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return -1; ++} ++ ++ + // Vector ideal reg corresponding to specified size in bytes + const uint Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize >= size, ""); +diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad +index bc9947327..bbe49bd62 100644 +--- a/src/hotspot/cpu/x86/x86_32.ad ++++ b/src/hotspot/cpu/x86/x86_32.ad +@@ -11909,12 +11909,12 @@ instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2 + ins_pipe( pipe_slow ); + %} + +-instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, ++instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, + eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ +- predicate(UseSSE42Intrinsics); ++ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n) -> encoding() == StrIntrinsicNode::U)); + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); +- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} ++ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} + ins_encode %{ + __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, + $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); +@@ -11922,6 +11922,19 @@ instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, + ins_pipe( pipe_slow ); + %} + ++instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, ++ eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ ++ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); ++ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} ++ ins_encode %{ ++ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, ++ $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ + // fast array equals + instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, + regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) +diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad +index 7e6739ffe..53f887ea6 100644 +--- a/src/hotspot/cpu/x86/x86_64.ad ++++ b/src/hotspot/cpu/x86/x86_64.ad +@@ -2975,7 +2975,7 @@ frame + RAX_H_num // Op_RegL + }; + // Excluded flags and vector registers. +- assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type"); ++ assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type"); + return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); + %} + %} +@@ -11509,13 +11509,13 @@ instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI + ins_pipe( pipe_slow ); + %} + +-instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, +- rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr) ++instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, ++ rbx_RegI result, legVecS vec1, legVecS vec2, legVecS vec3, rcx_RegI tmp, rFlagsReg cr) + %{ +- predicate(UseSSE42Intrinsics); ++ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); +- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} ++ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} + ins_encode %{ + __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, + $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); +@@ -11523,6 +11523,20 @@ instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, + ins_pipe( pipe_slow ); + %} + ++instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch, ++ rbx_RegI result, legVecS tmp_vec1, legVecS tmp_vec2, legVecS tmp_vec3, rcx_RegI tmp, rFlagsReg cr) ++%{ ++ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); ++ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); ++ effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); ++ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} ++ ins_encode %{ ++ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, ++ $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ + // fast string equals + instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result, + legVecS tmp1, legVecS tmp2, rbx_RegI tmp3, rFlagsReg cr) +diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp +index 74945999e..6c79d20a4 100644 +--- a/src/hotspot/os/linux/os_linux.cpp ++++ b/src/hotspot/os/linux/os_linux.cpp +@@ -1903,7 +1903,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { + {EM_PARISC, EM_PARISC, ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"}, + {EM_68K, EM_68K, ELFCLASS32, ELFDATA2MSB, (char*)"M68k"}, + {EM_AARCH64, EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"}, +- {EM_RISCV, EM_RISCV, ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V"}, ++#ifdef _LP64 ++ {EM_RISCV, EM_RISCV, ELFCLASS64, ELFDATA2LSB, (char*)"RISC-V64"}, ++#else ++ {EM_RISCV, EM_RISCV, ELFCLASS32, ELFDATA2LSB, (char*)"RISC-V32"}, ++#endif + {EM_LOONGARCH, EM_LOONGARCH, ELFCLASS64, ELFDATA2LSB, (char*)"LoongArch"}, + }; + +@@ -2735,6 +2739,8 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) { + strncpy(cpuinfo, "IA64", length); + #elif defined(PPC) + strncpy(cpuinfo, "PPC64", length); ++#elif defined(RISCV) ++ strncpy(cpuinfo, LP64_ONLY("RISCV64") NOT_LP64("RISCV32"), length); + #elif defined(S390) + strncpy(cpuinfo, "S390", length); + #elif defined(SPARC) +@@ -3966,7 +3972,8 @@ size_t os::Linux::find_large_page_size() { + IA64_ONLY(256 * M) + PPC_ONLY(4 * M) + S390_ONLY(1 * M) +- SPARC_ONLY(4 * M); ++ SPARC_ONLY(4 * M) ++ RISCV64_ONLY(2 * M); + #endif // ZERO + + FILE *fp = fopen("/proc/meminfo", "r"); +diff --git a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp +new file mode 100644 +index 000000000..961fff011 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp +@@ -0,0 +1,113 @@ ++/* ++ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP ++ ++#include "vm_version_riscv.hpp" ++ ++// Implementation of class atomic ++// Note that memory_order_conservative requires a full barrier after atomic stores. ++// See https://patchwork.kernel.org/patch/3575821/ ++ ++#define FULL_MEM_BARRIER __sync_synchronize() ++#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); ++#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); ++ ++template ++struct Atomic::PlatformAdd ++ : public Atomic::AddAndFetch > ++{ ++ template ++ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { ++ D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE); ++ FULL_MEM_BARRIER; ++ return res; ++ } ++}; ++ ++template ++template ++inline T Atomic::PlatformXchg::operator()(T exchange_value, ++ T volatile* dest, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(byte_size == sizeof(T)); ++ T res = __atomic_exchange_n(dest, exchange_value, __ATOMIC_RELEASE); ++ FULL_MEM_BARRIER; ++ return res; ++} ++ ++// No direct support for cmpxchg of bytes; emulate using int. ++template ++template ++inline T Atomic::PlatformCmpxchg::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(byte_size == sizeof(T)); ++ T value = compare_value; ++ if (order != memory_order_relaxed) { ++ FULL_MEM_BARRIER; ++ } ++ ++ __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */ false, ++ __ATOMIC_RELAXED, __ATOMIC_RELAXED); ++ ++ if (order != memory_order_relaxed) { ++ FULL_MEM_BARRIER; ++ } ++ return value; ++} ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(4 == sizeof(T)); ++ if (order != memory_order_relaxed) { ++ FULL_MEM_BARRIER; ++ } ++ T rv; ++ int tmp; ++ __asm volatile( ++ "1:\n\t" ++ " addiw %[tmp], %[cv], 0\n\t" // make sure compare_value signed_extend ++ " lr.w.aq %[rv], (%[dest])\n\t" ++ " bne %[rv], %[tmp], 2f\n\t" ++ " sc.w.rl %[tmp], %[ev], (%[dest])\n\t" ++ " bnez %[tmp], 1b\n\t" ++ "2:\n\t" ++ : [rv] "=&r" (rv), [tmp] "=&r" (tmp) ++ : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value) ++ : "memory"); ++ if (order != memory_order_relaxed) { ++ FULL_MEM_BARRIER; ++ } ++ return rv; ++} ++ ++#endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp +new file mode 100644 +index 000000000..44f04d1a9 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/bytes_linux_riscv.inline.hpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP ++#define OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP ++ ++#include ++ ++// Efficient swapping of data bytes from Java byte ++// ordering to native byte ordering and vice versa. ++inline u2 Bytes::swap_u2(u2 x) { ++ return bswap_16(x); ++} ++ ++inline u4 Bytes::swap_u4(u4 x) { ++ return bswap_32(x); ++} ++ ++inline u8 Bytes::swap_u8(u8 x) { ++ return bswap_64(x); ++} ++ ++#endif // OS_CPU_LINUX_RISCV_BYTES_LINUX_RISCV_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp +new file mode 100644 +index 000000000..645b40a7c +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/copy_linux_riscv.inline.hpp +@@ -0,0 +1,116 @@ ++/* ++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP ++#define OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP ++ ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static inline void pd_disjoint_words_helper(const HeapWord* from, HeapWord* to, size_t count, bool is_atomic) { ++ switch (count) { ++ case 8: to[7] = from[7]; // fall through ++ case 7: to[6] = from[6]; // fall through ++ case 6: to[5] = from[5]; // fall through ++ case 5: to[4] = from[4]; // fall through ++ case 4: to[3] = from[3]; // fall through ++ case 3: to[2] = from[2]; // fall through ++ case 2: to[1] = from[1]; // fall through ++ case 1: to[0] = from[0]; // fall through ++ case 0: break; ++ default: ++ if(is_atomic) { ++ while (count-- > 0) { *to++ = *from++; } ++ } else { ++ memcpy(to, from, count * HeapWordSize); ++ } ++ } ++} ++ ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words_helper(from, to, count, false); ++} ++ ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words_helper(from, to, count, true); ++} ++ ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ _Copy_conjoint_jshorts_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ _Copy_conjoint_jints_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ _Copy_conjoint_jlongs_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size."); ++ _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_bytes(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jshorts(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jints(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ _Copy_arrayof_conjoint_jlongs(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ _Copy_arrayof_conjoint_jlongs(from, to, count); ++} ++ ++#endif // OS_CPU_LINUX_RISCV_VM_COPY_LINUX_RISCV_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp +new file mode 100644 +index 000000000..041cdf4ff +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/globals_linux_riscv.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, DontYieldALot, false); ++define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default ++define_pd_global(intx, VMThreadStackSize, 2048); ++ ++define_pd_global(intx, CompilerThreadStackSize, 2048); ++ ++define_pd_global(uintx, JVMInvokeMethodSlack, 8192); ++ ++// Used on 64 bit platforms for UseCompressedOops base address ++define_pd_global(uintx, HeapBaseMinAddress, 2 * G); ++ ++#endif // OS_CPU_LINUX_RISCV_VM_GLOBALS_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp +new file mode 100644 +index 000000000..842aa51e0 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/orderAccess_linux_riscv.hpp +@@ -0,0 +1,73 @@ ++/* ++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP ++ ++// Included in orderAccess.hpp header file. ++ ++#include "vm_version_riscv.hpp" ++ ++// Implementation of class OrderAccess. ++ ++inline void OrderAccess::loadload() { acquire(); } ++inline void OrderAccess::storestore() { release(); } ++inline void OrderAccess::loadstore() { acquire(); } ++inline void OrderAccess::storeload() { fence(); } ++ ++inline void OrderAccess::acquire() { ++ READ_MEM_BARRIER; ++} ++ ++inline void OrderAccess::release() { ++ WRITE_MEM_BARRIER; ++} ++ ++inline void OrderAccess::fence() { ++ FULL_MEM_BARRIER; ++} ++ ++template ++struct OrderAccess::PlatformOrderedLoad ++{ ++ template ++ T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; } ++}; ++ ++template ++struct OrderAccess::PlatformOrderedStore ++{ ++ template ++ void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); } ++}; ++ ++template ++struct OrderAccess::PlatformOrderedStore ++{ ++ template ++ void operator()(T v, volatile T* p) const { release_store(p, v); fence(); } ++}; ++ ++#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +new file mode 100644 +index 000000000..37947701b +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +@@ -0,0 +1,628 @@ ++/* ++ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// no precompiled headers ++#include "asm/macroAssembler.hpp" ++#include "classfile/classLoader.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "classfile/vmSymbols.hpp" ++#include "code/codeCache.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nativeInst.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "jvm.h" ++#include "memory/allocation.inline.hpp" ++#include "os_share_linux.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/extendedPC.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/java.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/osThread.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/timer.hpp" ++#include "utilities/debug.hpp" ++#include "utilities/events.hpp" ++#include "utilities/vmError.hpp" ++ ++// put OS-includes here ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++ ++#define REG_LR 1 ++#define REG_FP 8 ++ ++NOINLINE address os::current_stack_pointer() { ++ return (address)__builtin_frame_address(0); ++} ++ ++char* os::non_memory_address_word() { ++ // Must never look like an address returned by reserve_memory, ++ return (char*) -1; ++} ++ ++address os::Linux::ucontext_get_pc(const ucontext_t * uc) { ++ return (address)uc->uc_mcontext.__gregs[REG_PC]; ++} ++ ++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { ++ uc->uc_mcontext.__gregs[REG_PC] = (intptr_t)pc; ++} ++ ++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP]; ++} ++ ++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread ++// is currently interrupted by SIGPROF. ++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal ++// frames. Currently we don't do that on Linux, so it's the same as ++// os::fetch_frame_from_context(). ++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, ++ const ucontext_t* uc, ++ intptr_t** ret_sp, ++ intptr_t** ret_fp) { ++ ++ assert(thread != NULL, "just checking"); ++ assert(ret_sp != NULL, "just checking"); ++ assert(ret_fp != NULL, "just checking"); ++ ++ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); ++} ++ ++ExtendedPC os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ ExtendedPC epc; ++ const ucontext_t* uc = (const ucontext_t*)ucVoid; ++ ++ if (uc != NULL) { ++ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); ++ if (ret_sp != NULL) { ++ *ret_sp = os::Linux::ucontext_get_sp(uc); ++ } ++ if (ret_fp != NULL) { ++ *ret_fp = os::Linux::ucontext_get_fp(uc); ++ } ++ } else { ++ // construct empty ExtendedPC for return value checking ++ epc = ExtendedPC(NULL); ++ if (ret_sp != NULL) { ++ *ret_sp = (intptr_t *)NULL; ++ } ++ if (ret_fp != NULL) { ++ *ret_fp = (intptr_t *)NULL; ++ } ++ } ++ ++ return epc; ++} ++ ++frame os::fetch_frame_from_context(const void* ucVoid) { ++ intptr_t* frame_sp = NULL; ++ intptr_t* frame_fp = NULL; ++ ExtendedPC epc = fetch_frame_from_context(ucVoid, &frame_sp, &frame_fp); ++ return frame(frame_sp, frame_fp, epc.pc()); ++} ++ ++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { ++ address pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (Interpreter::contains(pc)) { ++ // interpreter performs stack banging after the fixed frame header has ++ // been generated while the compilers perform it before. To maintain ++ // semantic consistency between interpreted and compiled frames, the ++ // method returns the Java sender of the current frame. ++ *fr = os::fetch_frame_from_context(uc); ++ if (!fr->is_first_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } else { ++ // more complex code with compiled code ++ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); ++ CodeBlob* cb = CodeCache::find_blob(pc); ++ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { ++ // Not sure where the pc points to, fallback to default ++ // stack overflow handling ++ return false; ++ } else { ++ // In compiled code, the stack banging is performed before RA ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* frame_fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* frame_sp = os::Linux::ucontext_get_sp(uc); ++ address frame_pc = (address)(uintptr_t)(uc->uc_mcontext.__gregs[REG_LR] - ++ NativeInstruction::instruction_size); ++ *fr = frame(frame_sp, frame_fp, frame_pc); ++ if (!fr->is_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ assert(!fr->is_first_frame(), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } ++ } ++ assert(fr->is_java_frame(), "Safety check"); ++ return true; ++} ++ ++// By default, gcc always saves frame pointer rfp on this stack. This ++// may get turned off by -fomit-frame-pointer. ++frame os::get_sender_for_C_frame(frame* fr) { ++ return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); ++} ++ ++NOINLINE frame os::current_frame() { ++ intptr_t **sender_sp = (intptr_t **)__builtin_frame_address(0); ++ if(sender_sp != NULL) { ++ frame myframe((intptr_t*)os::current_stack_pointer(), ++ sender_sp[frame::link_offset], ++ CAST_FROM_FN_PTR(address, os::current_frame)); ++ if (os::is_first_C_frame(&myframe)) { ++ // stack is not walkable ++ return frame(); ++ } else { ++ return os::get_sender_for_C_frame(&myframe); ++ } ++ } else { ++ ShouldNotReachHere(); ++ return frame(); ++ } ++} ++ ++// Utility functions ++extern "C" JNIEXPORT int ++JVM_handle_linux_signal(int sig, ++ siginfo_t* info, ++ void* ucVoid, ++ int abort_if_unrecognized) { ++ ucontext_t* uc = (ucontext_t*) ucVoid; ++ ++ Thread* t = Thread::current_or_null_safe(); ++ ++ // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away ++ // (no destructors can be run) ++ os::ThreadCrashProtection::check_crash_protection(sig, t); ++ ++ SignalHandlerMark shm(t); ++ ++ // Note: it's not uncommon that JNI code uses signal/sigset to install ++ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, ++ // or have a SIGILL handler when detecting CPU type). When that happens, ++ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To ++ // avoid unnecessary crash when libjsig is not preloaded, try handle signals ++ // that do not require siginfo/ucontext first. ++ ++ if (sig == SIGPIPE || sig == SIGXFSZ) { ++ // allow chained handler to go first ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } else { ++ // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219 ++ return true; ++ } ++ } ++ ++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT ++ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { ++ if (handle_assert_poison_fault(ucVoid, info->si_addr)) { ++ return 1; ++ } ++ } ++#endif ++ ++ JavaThread* thread = NULL; ++ VMThread* vmthread = NULL; ++ if (os::Linux::signal_handlers_are_installed) { ++ if (t != NULL ) { ++ if(t->is_Java_thread()) { ++ thread = (JavaThread*)t; ++ } else if(t->is_VM_thread()) { ++ vmthread = (VMThread *)t; ++ } ++ } ++ } ++ ++ // Handle SafeFetch faults ++ if (uc != NULL) { ++ address const pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (StubRoutines::is_safefetch_fault(pc)) { ++ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); ++ return 1; ++ } ++ } ++ ++ // decide if this trap can be handled by a stub ++ address stub = NULL; ++ ++ address pc = NULL; ++ ++ //%note os_trap_1 ++ if (info != NULL && uc != NULL && thread != NULL) { ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++ ++ // Handle ALL stack overflow variations here ++ if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++ ++ // check if fault address is within thread stack ++ if (thread->on_local_stack(addr)) { ++ // stack overflow ++ if (thread->in_stack_yellow_reserved_zone(addr)) { ++ if (thread->thread_state() == _thread_in_Java) { ++ if (thread->in_stack_reserved_zone(addr)) { ++ frame fr; ++ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { ++ assert(fr.is_java_frame(), "Must be a Java frame"); ++ frame activation = ++ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); ++ if (activation.sp() != NULL) { ++ thread->disable_stack_reserved_zone(); ++ if (activation.is_interpreted_frame()) { ++ thread->set_reserved_stack_activation((address)( ++ activation.fp() + frame::interpreter_frame_initial_sp_offset)); ++ } else { ++ thread->set_reserved_stack_activation((address)activation.unextended_sp()); ++ } ++ return 1; ++ } ++ } ++ } ++ // Throw a stack overflow exception. Guard pages will be reenabled ++ // while unwinding the stack. ++ thread->disable_stack_yellow_reserved_zone(); ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); ++ } else { ++ // Thread was in the vm or native code. Return and try to finish. ++ thread->disable_stack_yellow_reserved_zone(); ++ return 1; ++ } ++ } else if (thread->in_stack_red_zone(addr)) { ++ // Fatal red zone violation. Disable the guard pages and fall through ++ // to handle_unexpected_exception way down below. ++ thread->disable_stack_red_zone(); ++ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); ++ ++ // This is a likely cause, but hard to verify. Let's just print ++ // it as a hint. ++ tty->print_raw_cr("Please check if any of your loaded .so files has " ++ "enabled executable stack (see man page execstack(8))"); ++ } else { ++ // Accessing stack address below sp may cause SEGV if current ++ // thread has MAP_GROWSDOWN stack. This should only happen when ++ // current thread was created by user code with MAP_GROWSDOWN flag ++ // and then attached to VM. See notes in os_linux.cpp. ++ if (thread->osthread()->expanding_stack() == 0) { ++ thread->osthread()->set_expanding_stack(); ++ if (os::Linux::manually_expand_stack(thread, addr)) { ++ thread->osthread()->clear_expanding_stack(); ++ return 1; ++ } ++ thread->osthread()->clear_expanding_stack(); ++ } else { ++ fatal("recursive segv. expanding stack."); ++ } ++ } ++ } ++ } ++ ++ if (thread->thread_state() == _thread_in_Java) { ++ // Java thread running in Java code => find exception handler if any ++ // a fault inside compiled code, the interpreter, or a stub ++ ++ // Handle signal from NativeJump::patch_verified_entry(). ++ if ((sig == SIGILL || sig == SIGTRAP) ++ && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { ++ if (TraceTraps) { ++ tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); ++ } ++ stub = SharedRuntime::get_handle_wrong_method_stub(); ++ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { ++ stub = SharedRuntime::get_poll_stub(pc); ++ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { ++ // BugId 4454115: A read from a MappedByteBuffer can fault ++ // here if the underlying file has been truncated. ++ // Do not crash the VM in such a case. ++ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); ++ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; ++ if (nm != NULL && nm->has_unsafe_access()) { ++ address next_pc = pc + NativeCall::instruction_size; ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ } else if (sig == SIGFPE && ++ (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { ++ stub = ++ SharedRuntime:: ++ continuation_for_implicit_exception(thread, ++ pc, ++ SharedRuntime:: ++ IMPLICIT_DIVIDE_BY_ZERO); ++ } else if (sig == SIGSEGV && ++ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { ++ // Determination of interpreter/vtable stub/compiled code null exception ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); ++ } ++ } else if (thread->thread_state() == _thread_in_vm && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { ++ address next_pc = pc + NativeCall::instruction_size; ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ ++ // jni_fast_GetField can trap at certain pc's if a GC kicks in ++ // and the heap gets shrunk before the field access. ++ if ((sig == SIGSEGV) || (sig == SIGBUS)) { ++ address addr_slow = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr_slow != (address)-1) { ++ stub = addr_slow; ++ } ++ } ++ ++ // Check to see if we caught the safepoint code in the ++ // process of write protecting the memory serialization page. ++ // It write enables the page immediately after protecting it ++ // so we can just return to retry the write. ++ if ((sig == SIGSEGV) && ++ os::is_memory_serialize_page(thread, (address) info->si_addr)) { ++ // Block current thread until the memory serialize page permission restored. ++ os::block_on_serialize_page_trap(); ++ return true; ++ } ++ } ++ ++ if (stub != NULL) { ++ // save all thread context in case we need to restore it ++ if (thread != NULL) { ++ thread->set_saved_exception_pc(pc); ++ } ++ ++ os::Linux::ucontext_set_pc(uc, stub); ++ return true; ++ } ++ ++ // signal-chaining ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } ++ ++ if (!abort_if_unrecognized) { ++ // caller wants another chance, so give it to him ++ return false; ++ } ++ ++ if (pc == NULL && uc != NULL) { ++ pc = os::Linux::ucontext_get_pc(uc); ++ } ++ ++ // unmask current signal ++ sigset_t newset; ++ sigemptyset(&newset); ++ sigaddset(&newset, sig); ++ sigprocmask(SIG_UNBLOCK, &newset, NULL); ++ ++ VMError::report_and_die(t, sig, pc, info, ucVoid); ++ ++ ShouldNotReachHere(); ++ return true; // Mute compiler ++} ++ ++void os::Linux::init_thread_fpu_state(void) { ++} ++ ++int os::Linux::get_fpu_control_word(void) { ++ return 0; ++} ++ ++void os::Linux::set_fpu_control_word(int fpu_control) { ++} ++ ++ ++//////////////////////////////////////////////////////////////////////////////// ++// thread stack ++ ++// Minimum usable stack sizes required to get to user code. Space for ++// HotSpot guard pages is added later. ++size_t os::Posix::_compiler_thread_min_stack_allowed = 72 * K; ++size_t os::Posix::_java_thread_min_stack_allowed = 72 * K; ++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 72 * K; ++ ++// return default stack size for thr_type ++size_t os::Posix::default_stack_size(os::ThreadType thr_type) { ++ // default stack size (compiler thread needs larger stack) ++ size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M); ++ return s; ++} ++ ++///////////////////////////////////////////////////////////////////////////// ++// helper functions for fatal error handler ++ ++static const char* reg_abi_names[] = { ++ "pc", ++ "x1(ra)", "x2(sp)", "x3(gp)", "x4(tp)", ++ "x5(t0)", "x6(t1)", "x7(t2)", ++ "x8(s0)", "x9(s1)", ++ "x10(a0)", "x11(a1)", "x12(a2)", "x13(a3)", "x14(a4)", "x15(a5)", "x16(a6)", "x17(a7)", ++ "x18(s2)", "x19(s3)", "x20(s4)", "x21(s5)", "x22(s6)", "x23(s7)", "x24(s8)", "x25(s9)", "x26(s10)", "x27(s11)", ++ "x28(t3)", "x29(t4)","x30(t5)", "x31(t6)" ++}; ++ ++void os::print_context(outputStream *st, const void *context) { ++ if (context == NULL) { ++ return; ++ } ++ ++ const ucontext_t *uc = (const ucontext_t*)context; ++ st->print_cr("Registers:"); ++ for (int r = 0; r < 32; r++) { ++ st->print("%-*.*s=", 8, 8, reg_abi_names[r]); ++ print_location(st, uc->uc_mcontext.__gregs[r]); ++ } ++ st->cr(); ++ ++ intptr_t *frame_sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); ++ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(frame_sp)); ++ print_hex_dump(st, (address)frame_sp, (address)(frame_sp + 64), sizeof(intptr_t)); ++ st->cr(); ++ ++ // Note: it may be unsafe to inspect memory near pc. For example, pc may ++ // point to garbage if entry point in an nmethod is corrupted. Leave ++ // this at the end, and hope for the best. ++ address pc = os::Linux::ucontext_get_pc(uc); ++ print_instructions(st, pc, sizeof(char)); ++ st->cr(); ++} ++ ++void os::print_register_info(outputStream *st, const void *context) { ++ if (context == NULL) { ++ return; ++ } ++ ++ const ucontext_t *uc = (const ucontext_t*)context; ++ ++ st->print_cr("Register to memory mapping:"); ++ st->cr(); ++ ++ // this is horrendously verbose but the layout of the registers in the ++ // context does not match how we defined our abstract Register set, so ++ // we can't just iterate through the gregs area ++ ++ // this is only for the "general purpose" registers ++ ++ for (int r = 0; r < 32; r++) ++ st->print_cr("%-*.*s=" INTPTR_FORMAT, 8, 8, reg_abi_names[r], (uintptr_t)uc->uc_mcontext.__gregs[r]); ++ st->cr(); ++} ++ ++void os::setup_fpu() { ++} ++ ++#ifndef PRODUCT ++void os::verify_stack_alignment() { ++ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); ++} ++#endif ++ ++int os::extra_bang_size_in_bytes() { ++ return 0; ++} ++ ++extern "C" { ++ int SpinPause() { ++ return 0; ++ } ++ ++ void _Copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ if (from > to) { ++ const jshort *end = from + count; ++ while (from < end) { ++ *(to++) = *(from++); ++ } ++ } else if (from < to) { ++ const jshort *end = from; ++ from += count - 1; ++ to += count - 1; ++ while (from >= end) { ++ *(to--) = *(from--); ++ } ++ } ++ } ++ void _Copy_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ if (from > to) { ++ const jint *end = from + count; ++ while (from < end) { ++ *(to++) = *(from++); ++ } ++ } else if (from < to) { ++ const jint *end = from; ++ from += count - 1; ++ to += count - 1; ++ while (from >= end) { ++ *(to--) = *(from--); ++ } ++ } ++ } ++ void _Copy_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ if (from > to) { ++ const jlong *end = from + count; ++ while (from < end) { ++ os::atomic_copy64(from++, to++); ++ } ++ } else if (from < to) { ++ const jlong *end = from; ++ from += count - 1; ++ to += count - 1; ++ while (from >= end) { ++ os::atomic_copy64(from--, to--); ++ } ++ } ++ } ++ ++ void _Copy_arrayof_conjoint_bytes(const HeapWord* from, ++ HeapWord* to, ++ size_t count) { ++ memmove(to, from, count); ++ } ++ void _Copy_arrayof_conjoint_jshorts(const HeapWord* from, ++ HeapWord* to, ++ size_t count) { ++ memmove(to, from, count * 2); ++ } ++ void _Copy_arrayof_conjoint_jints(const HeapWord* from, ++ HeapWord* to, ++ size_t count) { ++ memmove(to, from, count * 4); ++ } ++ void _Copy_arrayof_conjoint_jlongs(const HeapWord* from, ++ HeapWord* to, ++ size_t count) { ++ memmove(to, from, count * 8); ++ } ++}; +diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp +new file mode 100644 +index 000000000..eae1635b0 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.hpp +@@ -0,0 +1,40 @@ ++/* ++ * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP ++ ++ static void setup_fpu(); ++ ++ // Used to register dynamic code cache area with the OS ++ // Note: Currently only used in 64 bit Windows implementations ++ static bool register_code_area(char *low, char *high) { return true; } ++ ++ // Atomically copy 64 bits of data ++ static void atomic_copy64(const volatile void *src, volatile void *dst) { ++ *(jlong *) dst = *(const jlong *) src; ++ } ++ ++#endif // OS_CPU_LINUX_RISCV_VM_OS_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp +new file mode 100644 +index 000000000..82b9bb6fd +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/prefetch_linux_riscv.inline.hpp +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP ++#define OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP ++ ++#include "runtime/prefetch.hpp" ++ ++ ++inline void Prefetch::read (void *loc, intx interval) { ++} ++ ++inline void Prefetch::write(void *loc, intx interval) { ++} ++ ++#endif // OS_CPU_LINUX_RISCV_VM_PREFETCH_LINUX_RISCV_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +new file mode 100644 +index 000000000..c78096931 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.cpp +@@ -0,0 +1,103 @@ ++/* ++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "memory/metaspaceShared.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/thread.inline.hpp" ++ ++frame JavaThread::pd_last_frame() { ++ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is ++// currently interrupted by SIGPROF ++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, ++ void* ucontext, bool isInJava) { ++ ++ assert(Thread::current() == this, "caller must be current thread"); ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { ++ assert(this->is_Java_thread(), "must be JavaThread"); ++ JavaThread* jt = (JavaThread *)this; ++ ++ // If we have a last_Java_frame, then we should use it even if ++ // isInJava == true. It should be more reliable than ucontext info. ++ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { ++ *fr_addr = jt->pd_last_frame(); ++ return true; ++ } ++ ++ // At this point, we don't have a last_Java_frame, so ++ // we try to glean some information out of the ucontext ++ // if we were running Java code when SIGPROF came in. ++ if (isInJava) { ++ ucontext_t* uc = (ucontext_t*) ucontext; ++ ++ intptr_t* ret_fp = NULL; ++ intptr_t* ret_sp = NULL; ++ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, ++ &ret_sp, &ret_fp); ++ if (addr.pc() == NULL || ret_sp == NULL ) { ++ // ucontext wasn't useful ++ return false; ++ } ++ ++ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { ++ // In the middle of a trampoline call. Bail out for safety. ++ // This happens rarely so shouldn't affect profiling. ++ return false; ++ } ++ ++ frame ret_frame(ret_sp, ret_fp, addr.pc()); ++ if (!ret_frame.safe_for_sender(jt)) { ++#ifdef COMPILER2 ++ frame ret_frame2(ret_sp, NULL, addr.pc()); ++ if (!ret_frame2.safe_for_sender(jt)) { ++ // nothing else to try if the frame isn't good ++ return false; ++ } ++ ret_frame = ret_frame2; ++#else ++ // nothing else to try if the frame isn't good ++ return false; ++#endif /* COMPILER2 */ ++ } ++ *fr_addr = ret_frame; ++ return true; ++ } ++ ++ // nothing else to try ++ return false; ++} ++ ++void JavaThread::cache_global_variables() { } +diff --git a/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +new file mode 100644 +index 000000000..657b98984 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/thread_linux_riscv.hpp +@@ -0,0 +1,67 @@ ++/* ++ * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP ++ ++ private: ++ void pd_initialize() { ++ _anchor.clear(); ++ } ++ ++ frame pd_last_frame(); ++ ++ public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* java_fp) { _anchor.set_last_Java_fp(java_fp); } ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ ++ static ByteSize last_Java_fp_offset() { ++ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); ++ } ++ ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ ++ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, ++ bool isInJava); ++ ++ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); ++private: ++ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++ ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} ++ ++#endif // OS_CPU_LINUX_RISCV_THREAD_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp +new file mode 100644 +index 000000000..8ee443b5d +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/vmStructs_linux_riscv.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP ++#define OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP ++ ++// These are the OS and CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* Threads (NOTE: incomplete) */ \ ++ /******************************/ \ ++ nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ ++ nonstatic_field(OSThread, _pthread_id, pthread_t) ++ ++ ++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ \ ++ /**********************/ \ ++ /* Posix Thread IDs */ \ ++ /**********************/ \ ++ \ ++ declare_integer_type(OSThread::thread_id_t) \ ++ declare_unsigned_integer_type(pthread_t) ++ ++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // OS_CPU_LINUX_RISCV_VM_VMSTRUCTS_LINUX_RISCV_HPP +diff --git a/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp +new file mode 100644 +index 000000000..ef9358aa0 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_riscv/vm_version_linux_riscv.cpp +@@ -0,0 +1,116 @@ ++/* ++ * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/register.hpp" ++#include "runtime/os.hpp" ++#include "runtime/os.inline.hpp" ++#include "runtime/vm_version.hpp" ++ ++#include ++#include ++ ++#ifndef HWCAP_ISA_I ++#define HWCAP_ISA_I (1 << ('I' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_M ++#define HWCAP_ISA_M (1 << ('M' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_A ++#define HWCAP_ISA_A (1 << ('A' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_F ++#define HWCAP_ISA_F (1 << ('F' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_D ++#define HWCAP_ISA_D (1 << ('D' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_C ++#define HWCAP_ISA_C (1 << ('C' - 'A')) ++#endif ++ ++#ifndef HWCAP_ISA_V ++#define HWCAP_ISA_V (1 << ('V' - 'A')) ++#endif ++ ++#define read_csr(csr) \ ++({ \ ++ register unsigned long __v; \ ++ __asm__ __volatile__ ("csrr %0, %1" \ ++ : "=r" (__v) \ ++ : "i" (csr) \ ++ : "memory"); \ ++ __v; \ ++}) ++ ++uint32_t VM_Version::get_current_vector_length() { ++ assert(_features & CPU_V, "should not call this"); ++ return (uint32_t)read_csr(CSR_VLENB); ++} ++ ++void VM_Version::get_os_cpu_info() { ++ ++ uint64_t auxv = getauxval(AT_HWCAP); ++ ++ STATIC_ASSERT(CPU_I == HWCAP_ISA_I); ++ STATIC_ASSERT(CPU_M == HWCAP_ISA_M); ++ STATIC_ASSERT(CPU_A == HWCAP_ISA_A); ++ STATIC_ASSERT(CPU_F == HWCAP_ISA_F); ++ STATIC_ASSERT(CPU_D == HWCAP_ISA_D); ++ STATIC_ASSERT(CPU_C == HWCAP_ISA_C); ++ STATIC_ASSERT(CPU_V == HWCAP_ISA_V); ++ ++ if (FILE *f = fopen("/proc/cpuinfo", "r")) { ++ char buf[512], *p; ++ while (fgets(buf, sizeof (buf), f) != NULL) { ++ if ((p = strchr(buf, ':')) != NULL) { ++ if (strncmp(buf, "uarch", sizeof "uarch" - 1) == 0) { ++ char* uarch = os::strdup(p + 2); ++ uarch[strcspn(uarch, "\n")] = '\0'; ++ _uarch = uarch; ++ break; ++ } ++ } ++ } ++ fclose(f); ++ } ++ ++ // RISC-V has four bit-manipulation ISA-extensions: Zba/Zbb/Zbc/Zbs. ++ // Availability for those extensions could not be queried from HWCAP. ++ // TODO: Add proper detection for those extensions. ++ _features = auxv & ( ++ HWCAP_ISA_I | ++ HWCAP_ISA_M | ++ HWCAP_ISA_A | ++ HWCAP_ISA_F | ++ HWCAP_ISA_D | ++ HWCAP_ISA_C | ++ HWCAP_ISA_V); ++} +diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp +index ba61aa4c0..4ca0b050b 100644 +--- a/src/hotspot/share/adlc/archDesc.cpp ++++ b/src/hotspot/share/adlc/archDesc.cpp +@@ -929,6 +929,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) { + // Match Vector types. + if (strncmp(idealOp, "Vec",3)==0) { + switch(last_char) { ++ case 'A': return "TypeVect::VECTA"; + case 'S': return "TypeVect::VECTS"; + case 'D': return "TypeVect::VECTD"; + case 'X': return "TypeVect::VECTX"; +@@ -939,6 +940,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) { + } + } + ++ if (strncmp(idealOp, "RegVMask", 8) == 0) { ++ return "Type::BOTTOM"; ++ } ++ + // !!!!! + switch(last_char) { + case 'I': return "TypeInt::INT"; +diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp +index f810fde76..2cf9636d1 100644 +--- a/src/hotspot/share/adlc/formssel.cpp ++++ b/src/hotspot/share/adlc/formssel.cpp +@@ -3968,6 +3968,8 @@ bool MatchRule::is_base_register(FormDict &globals) const { + strcmp(opType,"RegL")==0 || + strcmp(opType,"RegF")==0 || + strcmp(opType,"RegD")==0 || ++ strcmp(opType,"RegVMask")==0 || ++ strcmp(opType,"VecA")==0 || + strcmp(opType,"VecS")==0 || + strcmp(opType,"VecD")==0 || + strcmp(opType,"VecX")==0 || +diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp +index e30d39f73..af54dddf3 100644 +--- a/src/hotspot/share/c1/c1_LIR.cpp ++++ b/src/hotspot/share/c1/c1_LIR.cpp +@@ -199,7 +199,6 @@ bool LIR_OprDesc::is_oop() const { + void LIR_Op2::verify() const { + #ifdef ASSERT + switch (code()) { +- case lir_cmove: + case lir_xchg: + break; + +@@ -252,30 +251,27 @@ void LIR_Op2::verify() const { + + + LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block) +- : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) +- , _cond(cond) +- , _type(type) ++ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) + , _label(block->label()) ++ , _type(type) + , _block(block) + , _ublock(NULL) + , _stub(NULL) { + } + + LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub) : +- LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) +- , _cond(cond) +- , _type(type) ++ LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) + , _label(stub->entry()) ++ , _type(type) + , _block(NULL) + , _ublock(NULL) + , _stub(stub) { + } + + LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock) +- : LIR_Op(lir_cond_float_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) +- , _cond(cond) +- , _type(type) ++ : LIR_Op2(lir_cond_float_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) + , _label(block->label()) ++ , _type(type) + , _block(block) + , _ublock(ublock) + , _stub(NULL) +@@ -296,13 +292,13 @@ void LIR_OpBranch::change_ublock(BlockBegin* b) { + } + + void LIR_OpBranch::negate_cond() { +- switch (_cond) { +- case lir_cond_equal: _cond = lir_cond_notEqual; break; +- case lir_cond_notEqual: _cond = lir_cond_equal; break; +- case lir_cond_less: _cond = lir_cond_greaterEqual; break; +- case lir_cond_lessEqual: _cond = lir_cond_greater; break; +- case lir_cond_greaterEqual: _cond = lir_cond_less; break; +- case lir_cond_greater: _cond = lir_cond_lessEqual; break; ++ switch (cond()) { ++ case lir_cond_equal: set_cond(lir_cond_notEqual); break; ++ case lir_cond_notEqual: set_cond(lir_cond_equal); break; ++ case lir_cond_less: set_cond(lir_cond_greaterEqual); break; ++ case lir_cond_lessEqual: set_cond(lir_cond_greater); break; ++ case lir_cond_greaterEqual: set_cond(lir_cond_less); break; ++ case lir_cond_greater: set_cond(lir_cond_lessEqual); break; + default: ShouldNotReachHere(); + } + } +@@ -525,6 +521,13 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + assert(op->as_OpBranch() != NULL, "must be"); + LIR_OpBranch* opBranch = (LIR_OpBranch*)op; + ++ assert(opBranch->_tmp1->is_illegal() && opBranch->_tmp2->is_illegal() && ++ opBranch->_tmp3->is_illegal() && opBranch->_tmp4->is_illegal() && ++ opBranch->_tmp5->is_illegal(), "not used"); ++ ++ if (opBranch->_opr1->is_valid()) do_input(opBranch->_opr1); ++ if (opBranch->_opr2->is_valid()) do_input(opBranch->_opr2); ++ + if (opBranch->_info != NULL) do_info(opBranch->_info); + assert(opBranch->_result->is_illegal(), "not used"); + if (opBranch->_stub != NULL) opBranch->stub()->visit(this); +@@ -615,17 +618,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) { + // to the result operand, otherwise the backend fails + case lir_cmove: + { +- assert(op->as_Op2() != NULL, "must be"); +- LIR_Op2* op2 = (LIR_Op2*)op; ++ assert(op->as_Op4() != NULL, "must be"); ++ LIR_Op4* op4 = (LIR_Op4*)op; + +- assert(op2->_info == NULL && op2->_tmp1->is_illegal() && op2->_tmp2->is_illegal() && +- op2->_tmp3->is_illegal() && op2->_tmp4->is_illegal() && op2->_tmp5->is_illegal(), "not used"); +- assert(op2->_opr1->is_valid() && op2->_opr2->is_valid() && op2->_result->is_valid(), "used"); ++ assert(op4->_info == NULL && op4->_tmp1->is_illegal() && op4->_tmp2->is_illegal() && ++ op4->_tmp3->is_illegal() && op4->_tmp4->is_illegal() && op4->_tmp5->is_illegal(), "must be"); ++ assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && op4->_result->is_valid(), "used"); + +- do_input(op2->_opr1); +- do_input(op2->_opr2); +- do_temp(op2->_opr2); +- do_output(op2->_result); ++ do_input(op4->_opr1); ++ do_input(op4->_opr2); ++ if (op4->_opr3->is_valid()) do_input(op4->_opr3); ++ if (op4->_opr4->is_valid()) do_input(op4->_opr4); ++ do_temp(op4->_opr2); ++ do_output(op4->_result); + + break; + } +@@ -1048,6 +1053,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { + masm->emit_op3(this); + } + ++void LIR_Op4::emit_code(LIR_Assembler* masm) { ++ masm->emit_op4(this); ++} ++ + void LIR_OpLock::emit_code(LIR_Assembler* masm) { + masm->emit_lock(this); + if (stub()) { +@@ -1084,6 +1093,10 @@ LIR_List::LIR_List(Compilation* compilation, BlockBegin* block) + , _file(NULL) + , _line(0) + #endif ++#ifdef RISCV ++ , _cmp_opr1(LIR_OprFact::illegalOpr) ++ , _cmp_opr2(LIR_OprFact::illegalOpr) ++#endif + { } + + +@@ -1101,6 +1114,38 @@ void LIR_List::set_file_and_line(const char * file, int line) { + } + #endif + ++#ifdef RISCV ++void LIR_List::set_cmp_oprs(LIR_Op* op) { ++ switch (op->code()) { ++ case lir_cmp: ++ _cmp_opr1 = op->as_Op2()->in_opr1(); ++ _cmp_opr2 = op->as_Op2()->in_opr2(); ++ break; ++ case lir_branch: // fall through ++ case lir_cond_float_branch: ++ assert(op->as_OpBranch()->cond() == lir_cond_always || ++ (_cmp_opr1 != LIR_OprFact::illegalOpr && _cmp_opr2 != LIR_OprFact::illegalOpr), ++ "conditional branches must have legal operands"); ++ if (op->as_OpBranch()->cond() != lir_cond_always) { ++ op->as_Op2()->set_in_opr1(_cmp_opr1); ++ op->as_Op2()->set_in_opr2(_cmp_opr2); ++ } ++ break; ++ case lir_cmove: ++ op->as_Op4()->set_in_opr3(_cmp_opr1); ++ op->as_Op4()->set_in_opr4(_cmp_opr2); ++ break; ++#if INCLUDE_ZGC ++ case lir_zloadbarrier_test: ++ _cmp_opr1 = FrameMap::as_opr(t1); ++ _cmp_opr2 = LIR_OprFact::intConst(0); ++ break; ++#endif ++ default: ++ break; ++ } ++} ++#endif + + void LIR_List::append(LIR_InsertionBuffer* buffer) { + assert(this == buffer->lir_list(), "wrong lir list"); +@@ -1680,7 +1725,6 @@ const char * LIR_Op::name() const { + case lir_cmp_l2i: s = "cmp_l2i"; break; + case lir_ucmp_fd2i: s = "ucomp_fd2i"; break; + case lir_cmp_fd2i: s = "comp_fd2i"; break; +- case lir_cmove: s = "cmove"; break; + case lir_add: s = "add"; break; + case lir_sub: s = "sub"; break; + case lir_mul: s = "mul"; break; +@@ -1705,6 +1749,8 @@ const char * LIR_Op::name() const { + case lir_irem: s = "irem"; break; + case lir_fmad: s = "fmad"; break; + case lir_fmaf: s = "fmaf"; break; ++ // LIR_Op4 ++ case lir_cmove: s = "cmove"; break; + // LIR_OpJavaCall + case lir_static_call: s = "static"; break; + case lir_optvirtual_call: s = "optvirtual"; break; +@@ -1841,6 +1887,8 @@ void LIR_Op1::print_patch_code(outputStream* out, LIR_PatchCode code) { + // LIR_OpBranch + void LIR_OpBranch::print_instr(outputStream* out) const { + print_condition(out, cond()); out->print(" "); ++ in_opr1()->print(out); out->print(" "); ++ in_opr2()->print(out); out->print(" "); + if (block() != NULL) { + out->print("[B%d] ", block()->block_id()); + } else if (stub() != NULL) { +@@ -1927,7 +1975,7 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const { + + // LIR_Op2 + void LIR_Op2::print_instr(outputStream* out) const { +- if (code() == lir_cmove || code() == lir_cmp) { ++ if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) { + print_condition(out, condition()); out->print(" "); + } + in_opr1()->print(out); out->print(" "); +@@ -1978,6 +2026,15 @@ void LIR_Op3::print_instr(outputStream* out) const { + result_opr()->print(out); + } + ++// LIR_Op4 ++void LIR_Op4::print_instr(outputStream* out) const { ++ print_condition(out, condition()); out->print(" "); ++ in_opr1()->print(out); out->print(" "); ++ in_opr2()->print(out); out->print(" "); ++ in_opr3()->print(out); out->print(" "); ++ in_opr4()->print(out); out->print(" "); ++ result_opr()->print(out); ++} + + void LIR_OpLock::print_instr(outputStream* out) const { + hdr_opr()->print(out); out->print(" "); +diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp +index 3234ca018..88cd3b24e 100644 +--- a/src/hotspot/share/c1/c1_LIR.hpp ++++ b/src/hotspot/share/c1/c1_LIR.hpp +@@ -864,9 +864,11 @@ class LIR_OpConvert; + class LIR_OpAllocObj; + class LIR_OpRoundFP; + class LIR_Op2; +-class LIR_OpDelay; ++class LIR_OpBranch; ++class LIR_OpDelay; + class LIR_Op3; + class LIR_OpAllocArray; ++class LIR_Op4; + class LIR_OpCall; + class LIR_OpJavaCall; + class LIR_OpRTCall; +@@ -916,8 +918,6 @@ enum LIR_Code { + , lir_null_check + , lir_return + , lir_leal +- , lir_branch +- , lir_cond_float_branch + , lir_move + , lir_convert + , lir_alloc_object +@@ -929,11 +929,12 @@ enum LIR_Code { + , lir_unwind + , end_op1 + , begin_op2 ++ , lir_branch ++ , lir_cond_float_branch + , lir_cmp + , lir_cmp_l2i + , lir_ucmp_fd2i + , lir_cmp_fd2i +- , lir_cmove + , lir_add + , lir_sub + , lir_mul +@@ -964,6 +965,9 @@ enum LIR_Code { + , lir_fmad + , lir_fmaf + , end_op3 ++ , begin_op4 ++ , lir_cmove ++ , end_op4 + , begin_opJavaCall + , lir_static_call + , lir_optvirtual_call +@@ -1134,6 +1138,7 @@ class LIR_Op: public CompilationResourceObj { + virtual LIR_Op1* as_Op1() { return NULL; } + virtual LIR_Op2* as_Op2() { return NULL; } + virtual LIR_Op3* as_Op3() { return NULL; } ++ virtual LIR_Op4* as_Op4() { return NULL; } + virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } + virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; } + virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; } +@@ -1410,51 +1415,6 @@ class LIR_OpRTCall: public LIR_OpCall { + virtual void verify() const; + }; + +- +-class LIR_OpBranch: public LIR_Op { +- friend class LIR_OpVisitState; +- +- private: +- LIR_Condition _cond; +- BasicType _type; +- Label* _label; +- BlockBegin* _block; // if this is a branch to a block, this is the block +- BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block +- CodeStub* _stub; // if this is a branch to a stub, this is the stub +- +- public: +- LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl) +- : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) +- , _cond(cond) +- , _type(type) +- , _label(lbl) +- , _block(NULL) +- , _ublock(NULL) +- , _stub(NULL) { } +- +- LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block); +- LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub); +- +- // for unordered comparisons +- LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock); +- +- LIR_Condition cond() const { return _cond; } +- BasicType type() const { return _type; } +- Label* label() const { return _label; } +- BlockBegin* block() const { return _block; } +- BlockBegin* ublock() const { return _ublock; } +- CodeStub* stub() const { return _stub; } +- +- void change_block(BlockBegin* b); +- void change_ublock(BlockBegin* b); +- void negate_cond(); +- +- virtual void emit_code(LIR_Assembler* masm); +- virtual LIR_OpBranch* as_OpBranch() { return this; } +- virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +-}; +- +- + class ConversionStub; + + class LIR_OpConvert: public LIR_Op1 { +@@ -1614,19 +1574,19 @@ class LIR_Op2: public LIR_Op { + void verify() const; + + public: +- LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL) ++ LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, CodeEmitInfo* info = NULL, BasicType type = T_ILLEGAL) + : LIR_Op(code, LIR_OprFact::illegalOpr, info) + , _opr1(opr1) + , _opr2(opr2) +- , _type(T_ILLEGAL) +- , _condition(condition) ++ , _type(type) + , _fpu_stack_size(0) + , _tmp1(LIR_OprFact::illegalOpr) + , _tmp2(LIR_OprFact::illegalOpr) + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) +- , _tmp5(LIR_OprFact::illegalOpr) { +- assert(code == lir_cmp || code == lir_assert, "code check"); ++ , _tmp5(LIR_OprFact::illegalOpr) ++ , _condition(condition) { ++ assert(code == lir_cmp || code == lir_branch || code == lir_cond_float_branch || code == lir_assert, "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) +@@ -1634,7 +1594,6 @@ class LIR_Op2: public LIR_Op { + , _opr1(opr1) + , _opr2(opr2) + , _type(type) +- , _condition(condition) + , _fpu_stack_size(0) + , _tmp1(LIR_OprFact::illegalOpr) + , _tmp2(LIR_OprFact::illegalOpr) +@@ -1651,14 +1610,14 @@ class LIR_Op2: public LIR_Op { + , _opr1(opr1) + , _opr2(opr2) + , _type(type) +- , _condition(lir_cond_unknown) + , _fpu_stack_size(0) + , _tmp1(LIR_OprFact::illegalOpr) + , _tmp2(LIR_OprFact::illegalOpr) + , _tmp3(LIR_OprFact::illegalOpr) + , _tmp4(LIR_OprFact::illegalOpr) +- , _tmp5(LIR_OprFact::illegalOpr) { +- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); ++ , _tmp5(LIR_OprFact::illegalOpr) ++ , _condition(lir_cond_unknown) { ++ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, +@@ -1667,14 +1626,14 @@ class LIR_Op2: public LIR_Op { + , _opr1(opr1) + , _opr2(opr2) + , _type(T_ILLEGAL) +- , _condition(lir_cond_unknown) + , _fpu_stack_size(0) + , _tmp1(tmp1) + , _tmp2(tmp2) + , _tmp3(tmp3) + , _tmp4(tmp4) +- , _tmp5(tmp5) { +- assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); ++ , _tmp5(tmp5) ++ , _condition(lir_cond_unknown) { ++ assert(code != lir_cmp && code != lir_branch && code != lir_cond_float_branch && is_in_range(code, begin_op2, end_op2), "code check"); + } + + LIR_Opr in_opr1() const { return _opr1; } +@@ -1686,10 +1645,10 @@ class LIR_Op2: public LIR_Op { + LIR_Opr tmp4_opr() const { return _tmp4; } + LIR_Opr tmp5_opr() const { return _tmp5; } + LIR_Condition condition() const { +- assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; ++ assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch || code() == lir_assert, "only valid for branch and assert"); return _condition; + } + void set_condition(LIR_Condition condition) { +- assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; ++ assert(code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch, "only valid for branch"); _condition = condition; + } + + void set_fpu_stack_size(int size) { _fpu_stack_size = size; } +@@ -1703,6 +1662,53 @@ class LIR_Op2: public LIR_Op { + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; + }; + ++class LIR_OpBranch: public LIR_Op2 { ++ friend class LIR_OpVisitState; ++ ++ private: ++ BasicType _type; ++ Label* _label; ++ BlockBegin* _block; // if this is a branch to a block, this is the block ++ BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block ++ CodeStub* _stub; // if this is a branch to a stub, this is the stub ++ ++ public: ++ LIR_OpBranch(LIR_Condition cond, BasicType type, Label* lbl) ++ : LIR_Op2(lir_branch, cond, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, (CodeEmitInfo*) NULL) ++ , _label(lbl) ++ , _type(type) ++ , _block(NULL) ++ , _ublock(NULL) ++ , _stub(NULL) { } ++ ++ LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block); ++ LIR_OpBranch(LIR_Condition cond, BasicType type, CodeStub* stub); ++ ++ // for unordered comparisons ++ LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block, BlockBegin* ublock); ++ ++ LIR_Condition cond() const { ++ return condition(); ++ } ++ ++ void set_cond(LIR_Condition cond) { ++ set_condition(cond); ++ } ++ ++ Label* label() const { return _label; } ++ BlockBegin* block() const { return _block; } ++ BlockBegin* ublock() const { return _ublock; } ++ CodeStub* stub() const { return _stub; } ++ ++ void change_block(BlockBegin* b); ++ void change_ublock(BlockBegin* b); ++ void negate_cond(); ++ ++ virtual void emit_code(LIR_Assembler* masm); ++ virtual LIR_OpBranch* as_OpBranch() { return this; } ++ virtual void print_instr(outputStream* out) const PRODUCT_RETURN; ++}; ++ + class LIR_OpAllocArray : public LIR_Op { + friend class LIR_OpVisitState; + +@@ -1766,6 +1772,63 @@ class LIR_Op3: public LIR_Op { + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; + }; + ++class LIR_Op4: public LIR_Op { ++ friend class LIR_OpVisitState; ++ protected: ++ LIR_Opr _opr1; ++ LIR_Opr _opr2; ++ LIR_Opr _opr3; ++ LIR_Opr _opr4; ++ BasicType _type; ++ LIR_Opr _tmp1; ++ LIR_Opr _tmp2; ++ LIR_Opr _tmp3; ++ LIR_Opr _tmp4; ++ LIR_Opr _tmp5; ++ LIR_Condition _condition; ++ ++ public: ++ LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, ++ LIR_Opr result, BasicType type) ++ : LIR_Op(code, result, NULL) ++ , _opr1(opr1) ++ , _opr2(opr2) ++ , _opr3(opr3) ++ , _opr4(opr4) ++ , _type(type) ++ , _condition(condition) ++ , _tmp1(LIR_OprFact::illegalOpr) ++ , _tmp2(LIR_OprFact::illegalOpr) ++ , _tmp3(LIR_OprFact::illegalOpr) ++ , _tmp4(LIR_OprFact::illegalOpr) ++ , _tmp5(LIR_OprFact::illegalOpr) { ++ assert(code == lir_cmove, "code check"); ++ assert(type != T_ILLEGAL, "cmove should have type"); ++ } ++ ++ LIR_Opr in_opr1() const { return _opr1; } ++ LIR_Opr in_opr2() const { return _opr2; } ++ LIR_Opr in_opr3() const { return _opr3; } ++ LIR_Opr in_opr4() const { return _opr4; } ++ BasicType type() const { return _type; } ++ LIR_Opr tmp1_opr() const { return _tmp1; } ++ LIR_Opr tmp2_opr() const { return _tmp2; } ++ LIR_Opr tmp3_opr() const { return _tmp3; } ++ LIR_Opr tmp4_opr() const { return _tmp4; } ++ LIR_Opr tmp5_opr() const { return _tmp5; } ++ ++ LIR_Condition condition() const { return _condition; } ++ void set_condition(LIR_Condition condition) { _condition = condition; } ++ ++ void set_in_opr1(LIR_Opr opr) { _opr1 = opr; } ++ void set_in_opr2(LIR_Opr opr) { _opr2 = opr; } ++ void set_in_opr3(LIR_Opr opr) { _opr3 = opr; } ++ void set_in_opr4(LIR_Opr opr) { _opr4 = opr; } ++ virtual void emit_code(LIR_Assembler* masm); ++ virtual LIR_Op4* as_Op4() { return this; } ++ ++ virtual void print_instr(outputStream* out) const PRODUCT_RETURN; ++}; + + //-------------------------------- + class LabelObj: public CompilationResourceObj { +@@ -1988,6 +2051,10 @@ class LIR_List: public CompilationResourceObj { + const char * _file; + int _line; + #endif ++#ifdef RISCV ++ LIR_Opr _cmp_opr1; ++ LIR_Opr _cmp_opr2; ++#endif + + public: + void append(LIR_Op* op) { +@@ -2000,6 +2067,12 @@ class LIR_List: public CompilationResourceObj { + } + #endif // PRODUCT + ++#ifdef RISCV ++ set_cmp_oprs(op); ++ // lir_cmp set cmp oprs only on riscv ++ if (op->code() == lir_cmp) return; ++#endif ++ + _operations.append(op); + + #ifdef ASSERT +@@ -2016,6 +2089,10 @@ class LIR_List: public CompilationResourceObj { + void set_file_and_line(const char * file, int line); + #endif + ++#ifdef RISCV ++ void set_cmp_oprs(LIR_Op* op); ++#endif ++ + //---------- accessors --------------- + LIR_OpList* instructions_list() { return &_operations; } + int length() const { return _operations.length(); } +@@ -2149,8 +2226,9 @@ class LIR_List: public CompilationResourceObj { + void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); + void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info); + +- void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { +- append(new LIR_Op2(lir_cmove, condition, src1, src2, dst, type)); ++ void cmove(LIR_Condition condition, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type, ++ LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr) { ++ append(new LIR_Op4(lir_cmove, condition, src1, src2, cmp_opr1, cmp_opr2, dst, type)); + } + + void cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, +diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp +index 160483d5f..42a0350f7 100644 +--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp ++++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp +@@ -709,10 +709,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { + comp_fl2i(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op); + break; + +- case lir_cmove: +- cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type()); +- break; +- + case lir_shl: + case lir_shr: + case lir_ushr: +@@ -776,6 +772,17 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { + } + } + ++void LIR_Assembler::emit_op4(LIR_Op4* op) { ++ switch(op->code()) { ++ case lir_cmove: ++ cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->result_opr(), op->type(), op->in_opr3(), op->in_opr4()); ++ break; ++ ++ default: ++ Unimplemented(); ++ break; ++ } ++} + + void LIR_Assembler::build_frame() { + _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); +diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp +index 44a5bcbe5..406a58d21 100644 +--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp ++++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp +@@ -190,6 +190,7 @@ class LIR_Assembler: public CompilationResourceObj { + void emit_op1(LIR_Op1* op); + void emit_op2(LIR_Op2* op); + void emit_op3(LIR_Op3* op); ++ void emit_op4(LIR_Op4* op); + void emit_opBranch(LIR_OpBranch* op); + void emit_opLabel(LIR_OpLabel* op); + void emit_arraycopy(LIR_OpArrayCopy* op); +@@ -222,7 +223,8 @@ class LIR_Assembler: public CompilationResourceObj { + void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); + void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions + void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); +- void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); ++ void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type, ++ LIR_Opr cmp_opr1 = LIR_OprFact::illegalOpr, LIR_Opr cmp_opr2 = LIR_OprFact::illegalOpr); + + void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); + void ic_call( LIR_OpJavaCall* op); +diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp +index c28055fd9..d00bfe91a 100644 +--- a/src/hotspot/share/c1/c1_LinearScan.cpp ++++ b/src/hotspot/share/c1/c1_LinearScan.cpp +@@ -1242,8 +1242,8 @@ void LinearScan::add_register_hints(LIR_Op* op) { + break; + } + case lir_cmove: { +- assert(op->as_Op2() != NULL, "lir_cmove must be LIR_Op2"); +- LIR_Op2* cmove = (LIR_Op2*)op; ++ assert(op->as_Op4() != NULL, "lir_cmove must be LIR_Op4"); ++ LIR_Op4* cmove = (LIR_Op4*)op; + + LIR_Opr move_from = cmove->in_opr1(); + LIR_Opr move_to = cmove->result_opr(); +@@ -3140,6 +3140,9 @@ void LinearScan::do_linear_scan() { + } + } + ++#ifndef RISCV ++ // Disable these optimizations on riscv temporarily, because it does not ++ // work when the comparison operands are bound to branches or cmoves. + { TIME_LINEAR_SCAN(timer_optimize_lir); + + EdgeMoveOptimizer::optimize(ir()->code()); +@@ -3147,6 +3150,7 @@ void LinearScan::do_linear_scan() { + // check that cfg is still correct after optimizations + ir()->verify(); + } ++#endif + + NOT_PRODUCT(print_lir(1, "Before Code Generation", false)); + NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_final)); +@@ -6284,14 +6288,14 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { + // There might be a cmove inserted for profiling which depends on the same + // compare. If we change the condition of the respective compare, we have + // to take care of this cmove as well. +- LIR_Op2* prev_cmove = NULL; ++ LIR_Op4* prev_cmove = NULL; + + for(int j = instructions->length() - 3; j >= 0 && prev_cmp == NULL; j--) { + prev_op = instructions->at(j); + // check for the cmove + if (prev_op->code() == lir_cmove) { +- assert(prev_op->as_Op2() != NULL, "cmove must be of type LIR_Op2"); +- prev_cmove = (LIR_Op2*)prev_op; ++ assert(prev_op->as_Op4() != NULL, "cmove must be of type LIR_Op4"); ++ prev_cmove = (LIR_Op4*)prev_op; + assert(prev_branch->cond() == prev_cmove->condition(), "should be the same"); + } + if (prev_op->code() == lir_cmp) { +diff --git a/src/hotspot/share/classfile/vmSymbols.cpp b/src/hotspot/share/classfile/vmSymbols.cpp +index 19fe196bc..d9cb8e999 100644 +--- a/src/hotspot/share/classfile/vmSymbols.cpp ++++ b/src/hotspot/share/classfile/vmSymbols.cpp +@@ -523,6 +523,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) { + case vmIntrinsics::_indexOfIU: + case vmIntrinsics::_indexOfIUL: + case vmIntrinsics::_indexOfU_char: ++ case vmIntrinsics::_indexOfL_char: + case vmIntrinsics::_compareToL: + case vmIntrinsics::_compareToU: + case vmIntrinsics::_compareToLU: +@@ -808,6 +809,7 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) { + case vmIntrinsics::_indexOfIU: + case vmIntrinsics::_indexOfIUL: + case vmIntrinsics::_indexOfU_char: ++ case vmIntrinsics::_indexOfL_char: + if (!SpecialStringIndexOf) return true; + break; + case vmIntrinsics::_equalsL: +diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp +index cef3f530c..a31525003 100644 +--- a/src/hotspot/share/classfile/vmSymbols.hpp ++++ b/src/hotspot/share/classfile/vmSymbols.hpp +@@ -946,6 +946,7 @@ + do_intrinsic(_indexOfIU, java_lang_StringUTF16, indexOf_name, indexOfI_signature, F_S) \ + do_intrinsic(_indexOfIUL, java_lang_StringUTF16, indexOfUL_name, indexOfI_signature, F_S) \ + do_intrinsic(_indexOfU_char, java_lang_StringUTF16, indexOfChar_name, indexOfChar_signature, F_S) \ ++ do_intrinsic(_indexOfL_char, java_lang_StringLatin1,indexOfChar_name, indexOfChar_signature, F_S) \ + do_name( indexOf_name, "indexOf") \ + do_name( indexOfChar_name, "indexOfChar") \ + do_name( indexOfUL_name, "indexOfLatin1") \ +diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +index 4771a8b86..295f82ccc 100644 +--- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp ++++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +@@ -31,7 +31,7 @@ + #include "utilities/defaultStream.hpp" + + void ShenandoahArguments::initialize() { +-#if !(defined AARCH64 || defined AMD64 || defined IA32) ++#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined RISCV64) + vm_exit_during_initialization("Shenandoah GC is not supported on this platform."); + #endif + +diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +index e01a242a5..ff16de0e7 100644 +--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp ++++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +@@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) { + inline bool JfrBigEndian::platform_supports_unaligned_reads(void) { + #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390) + return true; +-#elif defined(SPARC) || defined(ARM) || defined(AARCH64) ++#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(RISCV) + return false; + #else + #warning "Unconfigured platform" +diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp +index 7768615b7..ef006f087 100644 +--- a/src/hotspot/share/opto/c2compiler.cpp ++++ b/src/hotspot/share/opto/c2compiler.cpp +@@ -510,6 +510,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt + case vmIntrinsics::_indexOfIU: + case vmIntrinsics::_indexOfIUL: + case vmIntrinsics::_indexOfU_char: ++ case vmIntrinsics::_indexOfL_char: + case vmIntrinsics::_toBytesStringU: + case vmIntrinsics::_getCharsStringU: + case vmIntrinsics::_getCharStringU: +diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp +index 500054218..fafbde78d 100644 +--- a/src/hotspot/share/opto/chaitin.cpp ++++ b/src/hotspot/share/opto/chaitin.cpp +@@ -77,6 +77,7 @@ void LRG::dump() const { + if( _is_oop ) tty->print("Oop "); + if( _is_float ) tty->print("Float "); + if( _is_vector ) tty->print("Vector "); ++ if( _is_scalable ) tty->print("Scalable "); + if( _was_spilled1 ) tty->print("Spilled "); + if( _was_spilled2 ) tty->print("Spilled2 "); + if( _direct_conflict ) tty->print("Direct_conflict "); +@@ -591,6 +592,7 @@ void PhaseChaitin::Register_Allocate() { + + // Merge multidefs if multiple defs representing the same value are used in a single block. + merge_multidefs(); ++ merge_debugdefs(); + + #ifdef ASSERT + // Veify the graph after RA. +@@ -646,7 +648,15 @@ void PhaseChaitin::Register_Allocate() { + // Live ranges record the highest register in their mask. + // We want the low register for the AD file writer's convenience. + OptoReg::Name hi = lrg.reg(); // Get hi register +- OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo ++ int num_regs = lrg.num_regs(); ++ if (lrg.is_scalable() && OptoReg::is_stack(hi)) { ++ // For scalable vector registers, when they are allocated in physical ++ // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable ++ // vector. If they are allocated on stack, we need to get the actual ++ // num_regs, which reflects the physical length of scalable registers. ++ num_regs = lrg.scalable_reg_slots(); ++ } ++ OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo + // We have to use pair [lo,lo+1] even for wide vectors because + // the rest of code generation works only with pairs. It is safe + // since for registers encoding only 'lo' is used. +@@ -801,8 +811,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { + // Check for vector live range (only if vector register is used). + // On SPARC vector uses RegD which could be misaligned so it is not + // processes as vector in RA. +- if (RegMask::is_vector(ireg)) ++ if (RegMask::is_vector(ireg)) { + lrg._is_vector = 1; ++ if (ireg == Op_VecA) { ++ assert(Matcher::supports_scalable_vector(), "scalable vector should be supported"); ++ lrg._is_scalable = 1; ++ // For scalable vector, when it is allocated in physical register, ++ // num_regs is RegMask::SlotsPerVecA for reg mask, ++ // which may not be the actual physical register size. ++ // If it is allocated in stack, we need to get the actual ++ // physical length of scalable vector register. ++ lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT)); ++ } ++ } + assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL, + "vector must be in vector registers"); + +@@ -912,6 +933,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { + lrg.set_reg_pressure(1); + #endif + break; ++ case Op_VecA: ++ assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); ++ assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity"); ++ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned"); ++ lrg.set_num_regs(RegMask::SlotsPerVecA); ++ lrg.set_reg_pressure(1); ++ break; + case Op_VecS: + assert(Matcher::vector_size_supported(T_BYTE,4), "sanity"); + assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity"); +@@ -1358,6 +1386,47 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) { + return false; + } + ++static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) { ++ int num_regs = lrg.num_regs(); ++ OptoReg::Name assigned = mask.find_first_set(lrg, num_regs); ++ ++ if (lrg.is_scalable()) { ++ // a physical register is found ++ if (chunk == 0 && OptoReg::is_reg(assigned)) { ++ return assigned; ++ } ++ ++ // find available stack slots for scalable register ++ if (lrg._is_vector) { ++ num_regs = lrg.scalable_reg_slots(); ++ // if actual scalable vector register is exactly SlotsPerVecA * 32 bits ++ if (num_regs == RegMask::SlotsPerVecA) { ++ return assigned; ++ } ++ ++ // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it ++ // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits ++ // instead of SlotsPerVecA bits. ++ assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg ++ while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) { ++ // Verify the found reg has scalable_reg_slots() bits set. ++ if (mask.is_valid_reg(assigned, num_regs)) { ++ return assigned; ++ } else { ++ // Remove more for each iteration ++ mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg ++ mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits ++ assigned = mask.find_first_set(lrg, num_regs); ++ } ++ } ++ return OptoReg::Bad; // will cause chunk change, and retry next chunk ++ } ++ } ++ ++ return assigned; ++} ++ ++ + // Choose a color using the biasing heuristic + OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { + +@@ -1391,7 +1460,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { + RegMask tempmask = lrg.mask(); + tempmask.AND(lrgs(copy_lrg).mask()); + tempmask.clear_to_sets(lrg.num_regs()); +- OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs()); ++ OptoReg::Name reg = find_first_set(lrg, tempmask, chunk); + if (OptoReg::is_valid(reg)) + return reg; + } +@@ -1400,7 +1469,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { + // If no bias info exists, just go with the register selection ordering + if (lrg._is_vector || lrg.num_regs() == 2) { + // Find an aligned set +- return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk); ++ return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk); + } + + // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate +@@ -1564,12 +1633,21 @@ uint PhaseChaitin::Select( ) { + int n_regs = lrg->num_regs(); + assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity"); + if (n_regs == 1 || !lrg->_fat_proj) { +- assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); ++ if (Matcher::supports_scalable_vector()) { ++ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity"); ++ } else { ++ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); ++ } + lrg->Clear(); // Clear the mask + lrg->Insert(reg); // Set regmask to match selected reg + // For vectors and pairs, also insert the low bit of the pair +- for (int i = 1; i < n_regs; i++) ++ // We always choose the high bit, then mask the low bits by register size ++ if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack ++ n_regs = lrg->scalable_reg_slots(); ++ } ++ for (int i = 1; i < n_regs; i++) { + lrg->Insert(OptoReg::add(reg,-i)); ++ } + lrg->set_mask_size(n_regs); + } else { // Else fatproj + // mask must be equal to fatproj bits, by definition +diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp +index e5be5b966..b5d1b0604 100644 +--- a/src/hotspot/share/opto/chaitin.hpp ++++ b/src/hotspot/share/opto/chaitin.hpp +@@ -115,9 +115,11 @@ public: + _msize_valid=1; + if (_is_vector) { + assert(!_fat_proj, "sanity"); +- _mask.verify_sets(_num_regs); ++ if (!(_is_scalable && OptoReg::is_stack(_reg))) { ++ assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets"); ++ } + } else if (_num_regs == 2 && !_fat_proj) { +- _mask.verify_pairs(); ++ assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs"); + } + #endif + } +@@ -143,10 +145,34 @@ public: + private: + uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else + // except _num_regs is kill count for fat_proj ++ ++ // For scalable register, num_regs may not be the actual physical register size. ++ // We need to get the actual physical length of scalable register when scalable ++ // register is spilled. The size of one slot is 32-bit. ++ uint _scalable_reg_slots; // Actual scalable register length of slots. ++ // Meaningful only when _is_scalable is true. + public: + int num_regs() const { return _num_regs; } + void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; } + ++ uint scalable_reg_slots() { return _scalable_reg_slots; } ++ void set_scalable_reg_slots(uint slots) { ++ assert(_is_scalable, "scalable register"); ++ assert(slots > 0, "slots of scalable register is not valid"); ++ _scalable_reg_slots = slots; ++ } ++ ++ bool is_scalable() { ++#ifdef ASSERT ++ if (_is_scalable) { ++ // Should only be a vector for now, but it could also be a RegVMask in future. ++ assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg"); ++ } ++#endif ++ return _is_scalable; ++ } ++ ++ + private: + // Number of physical registers this live range uses when it colors + // Architecture and register-set dependent +@@ -172,6 +198,7 @@ public: + uint _is_oop:1, // Live-range holds an oop + _is_float:1, // True if in float registers + _is_vector:1, // True if in vector registers ++ _is_scalable:1, // True if register size is scalable + _was_spilled1:1, // True if prior spilling on def + _was_spilled2:1, // True if twice prior spilling on def + _is_bound:1, // live range starts life with no +@@ -756,6 +783,7 @@ private: + + // Merge nodes that are a part of a multidef lrg and produce the same value within a block. + void merge_multidefs(); ++ void merge_debugdefs(); + + private: + +diff --git a/src/hotspot/share/opto/intrinsicnode.hpp b/src/hotspot/share/opto/intrinsicnode.hpp +index c0dfe1b0c..2d9526a39 100644 +--- a/src/hotspot/share/opto/intrinsicnode.hpp ++++ b/src/hotspot/share/opto/intrinsicnode.hpp +@@ -47,10 +47,11 @@ class PartialSubtypeCheckNode : public Node { + // Base class for Ideal nodes used in String intrinsic code. + class StrIntrinsicNode: public Node { + public: +- // Possible encodings of the two parameters passed to the string intrinsic. ++ // Possible encodings of the parameters passed to the string intrinsic. + // 'L' stands for Latin1 and 'U' stands for UTF16. For example, 'LU' means that + // the first string is Latin1 encoded and the second string is UTF16 encoded. +- typedef enum ArgEncoding { LL, LU, UL, UU, none } ArgEnc; ++ // 'L' means that the single string is Latin1 encoded ++ typedef enum ArgEncoding { LL, LU, UL, UU, L, U, none } ArgEnc; + + protected: + // Encoding of strings. Used to select the right version of the intrinsic. +diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp +index 6b6aa9e9b..8719c5b12 100644 +--- a/src/hotspot/share/opto/library_call.cpp ++++ b/src/hotspot/share/opto/library_call.cpp +@@ -217,7 +217,7 @@ class LibraryCallKit : public GraphKit { + bool inline_string_indexOfI(StrIntrinsicNode::ArgEnc ae); + Node* make_indexOf_node(Node* src_start, Node* src_count, Node* tgt_start, Node* tgt_count, + RegionNode* region, Node* phi, StrIntrinsicNode::ArgEnc ae); +- bool inline_string_indexOfChar(); ++ bool inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae); + bool inline_string_equals(StrIntrinsicNode::ArgEnc ae); + bool inline_string_toBytesU(); + bool inline_string_getCharsU(); +@@ -590,7 +590,8 @@ bool LibraryCallKit::try_to_inline(int predicate) { + case vmIntrinsics::_indexOfIL: return inline_string_indexOfI(StrIntrinsicNode::LL); + case vmIntrinsics::_indexOfIU: return inline_string_indexOfI(StrIntrinsicNode::UU); + case vmIntrinsics::_indexOfIUL: return inline_string_indexOfI(StrIntrinsicNode::UL); +- case vmIntrinsics::_indexOfU_char: return inline_string_indexOfChar(); ++ case vmIntrinsics::_indexOfU_char: return inline_string_indexOfChar(StrIntrinsicNode::U); ++ case vmIntrinsics::_indexOfL_char: return inline_string_indexOfChar(StrIntrinsicNode::L); + + case vmIntrinsics::_equalsL: return inline_string_equals(StrIntrinsicNode::LL); + case vmIntrinsics::_equalsU: return inline_string_equals(StrIntrinsicNode::UU); +@@ -1419,7 +1420,7 @@ Node* LibraryCallKit::make_indexOf_node(Node* src_start, Node* src_count, Node* + } + + //-----------------------------inline_string_indexOfChar----------------------- +-bool LibraryCallKit::inline_string_indexOfChar() { ++bool LibraryCallKit::inline_string_indexOfChar(StrIntrinsicNode::ArgEnc ae) { + if (too_many_traps(Deoptimization::Reason_intrinsic)) { + return false; + } +@@ -1434,12 +1435,12 @@ bool LibraryCallKit::inline_string_indexOfChar() { + + src = must_be_not_null(src, true); + +- Node* src_offset = _gvn.transform(new LShiftINode(from_index, intcon(1))); ++ Node* src_offset = ae == StrIntrinsicNode::L ? from_index : _gvn.transform(new LShiftINode(from_index, intcon(1))); + Node* src_start = array_element_address(src, src_offset, T_BYTE); + Node* src_count = _gvn.transform(new SubINode(max, from_index)); + + // Range checks +- generate_string_range_check(src, src_offset, src_count, true); ++ generate_string_range_check(src, src_offset, src_count, ae == StrIntrinsicNode::U); + if (stopped()) { + return true; + } +@@ -1447,7 +1448,7 @@ bool LibraryCallKit::inline_string_indexOfChar() { + RegionNode* region = new RegionNode(3); + Node* phi = new PhiNode(region, TypeInt::INT); + +- Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, StrIntrinsicNode::none); ++ Node* result = new StrIndexOfCharNode(control(), memory(TypeAryPtr::BYTES), src_start, src_count, tgt, ae); + C->set_has_split_ifs(true); // Has chance for split-if optimization + _gvn.transform(result); + +diff --git a/src/hotspot/share/opto/machnode.cpp b/src/hotspot/share/opto/machnode.cpp +index 8d526b15d..92b4f7158 100644 +--- a/src/hotspot/share/opto/machnode.cpp ++++ b/src/hotspot/share/opto/machnode.cpp +@@ -147,7 +147,7 @@ uint MachNode::size(PhaseRegAlloc *ra_) const { + return MachNode::emit_size(ra_); + } + +-//------------------------------size------------------------------------------- ++//-------------------------emit_size------------------------------------------- + // Helper function that computes size by emitting code + uint MachNode::emit_size(PhaseRegAlloc *ra_) const { + // Emit into a trash buffer and count bytes emitted. +diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp +index a52325680..dad70565b 100644 +--- a/src/hotspot/share/opto/machnode.hpp ++++ b/src/hotspot/share/opto/machnode.hpp +@@ -334,6 +334,10 @@ public: + // Top-level ideal Opcode matched + virtual int ideal_Opcode() const { return Op_Node; } + ++ virtual bool is_Opcode_equal(Node* node) { ++ return node->is_Mach() && (ideal_Opcode() == node->as_Mach()->ideal_Opcode()); ++ } ++ + // Adds the label for the case + virtual void add_case_label( int switch_val, Label* blockLabel); + +diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp +index 9e9b3383f..97de5e314 100644 +--- a/src/hotspot/share/opto/matcher.cpp ++++ b/src/hotspot/share/opto/matcher.cpp +@@ -84,6 +84,7 @@ Matcher::Matcher() + idealreg2spillmask [Op_RegF] = NULL; + idealreg2spillmask [Op_RegD] = NULL; + idealreg2spillmask [Op_RegP] = NULL; ++ idealreg2spillmask [Op_VecA] = NULL; + idealreg2spillmask [Op_VecS] = NULL; + idealreg2spillmask [Op_VecD] = NULL; + idealreg2spillmask [Op_VecX] = NULL; +@@ -110,6 +111,7 @@ Matcher::Matcher() + idealreg2mhdebugmask[Op_RegF] = NULL; + idealreg2mhdebugmask[Op_RegD] = NULL; + idealreg2mhdebugmask[Op_RegP] = NULL; ++ idealreg2mhdebugmask[Op_VecA] = NULL; + idealreg2mhdebugmask[Op_VecS] = NULL; + idealreg2mhdebugmask[Op_VecD] = NULL; + idealreg2mhdebugmask[Op_VecX] = NULL; +@@ -424,7 +426,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) { + void Matcher::init_first_stack_mask() { + + // Allocate storage for spill masks as masks for the appropriate load type. +- RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+5)); ++ RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+6)); + + idealreg2spillmask [Op_RegN] = &rms[0]; + idealreg2spillmask [Op_RegI] = &rms[1]; +@@ -447,11 +449,12 @@ void Matcher::init_first_stack_mask() { + idealreg2mhdebugmask[Op_RegD] = &rms[16]; + idealreg2mhdebugmask[Op_RegP] = &rms[17]; + +- idealreg2spillmask [Op_VecS] = &rms[18]; +- idealreg2spillmask [Op_VecD] = &rms[19]; +- idealreg2spillmask [Op_VecX] = &rms[20]; +- idealreg2spillmask [Op_VecY] = &rms[21]; +- idealreg2spillmask [Op_VecZ] = &rms[22]; ++ idealreg2spillmask [Op_VecA] = &rms[18]; ++ idealreg2spillmask [Op_VecS] = &rms[19]; ++ idealreg2spillmask [Op_VecD] = &rms[20]; ++ idealreg2spillmask [Op_VecX] = &rms[21]; ++ idealreg2spillmask [Op_VecY] = &rms[22]; ++ idealreg2spillmask [Op_VecZ] = &rms[23]; + + OptoReg::Name i; + +@@ -478,6 +481,7 @@ void Matcher::init_first_stack_mask() { + // Keep spill masks aligned. + aligned_stack_mask.clear_to_pairs(); + assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); ++ RegMask scalable_stack_mask = aligned_stack_mask; + + *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; + #ifdef _LP64 +@@ -548,6 +552,26 @@ void Matcher::init_first_stack_mask() { + *idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ]; + idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask); + } ++ ++ if (Matcher::supports_scalable_vector()) { ++ int k = 1; ++ OptoReg::Name in = OptoReg::add(_in_arg_limit, -1); ++ // Exclude last input arg stack slots to avoid spilling vector register there, ++ // otherwise vector spills could stomp over stack slots in caller frame. ++ for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) { ++ scalable_stack_mask.Remove(in); ++ in = OptoReg::add(in, -1); ++ } ++ ++ // For VecA ++ scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA); ++ assert(scalable_stack_mask.is_AllStack(), "should be infinite stack"); ++ *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA]; ++ idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask); ++ } else { ++ *idealreg2spillmask[Op_VecA] = RegMask::Empty; ++ } ++ + if (UseFPUForSpilling) { + // This mask logic assumes that the spill operations are + // symmetric and that the registers involved are the same size. +@@ -872,6 +896,11 @@ void Matcher::init_spill_mask( Node *ret ) { + idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); + + // Vector regmasks. ++ if (Matcher::supports_scalable_vector()) { ++ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));; ++ MachNode *spillVectA = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTA)); ++ idealreg2regmask[Op_VecA] = &spillVectA->out_RegMask(); ++ } + if (Matcher::vector_size_supported(T_BYTE,4)) { + TypeVect::VECTS = TypeVect::make(T_BYTE, 4); + MachNode *spillVectS = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS)); +diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp +index 244e3d1f8..9a8307102 100644 +--- a/src/hotspot/share/opto/matcher.hpp ++++ b/src/hotspot/share/opto/matcher.hpp +@@ -310,7 +310,7 @@ public: + + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen +- static const bool match_rule_supported_vector(int opcode, int vlen); ++ static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt); + + // Some microarchitectures have mask registers used on vectors + static const bool has_predicated_vectors(void); +@@ -333,6 +333,10 @@ public: + Matcher::min_vector_size(bt) <= size); + } + ++ static const bool supports_scalable_vector(); ++ // Actual max scalable vector register length. ++ static const int scalable_vector_reg_size(const BasicType bt); ++ + // Vector ideal reg + static const uint vector_ideal_reg(int len); + static const uint vector_shift_count_ideal_reg(int len); +diff --git a/src/hotspot/share/opto/node.cpp b/src/hotspot/share/opto/node.cpp +index 02bb6bb16..99d51ba05 100644 +--- a/src/hotspot/share/opto/node.cpp ++++ b/src/hotspot/share/opto/node.cpp +@@ -2359,6 +2359,27 @@ Node* Node::find_similar(int opc) { + return NULL; + } + ++//--------------------------is_similar----------------------------------- ++// True if a node has the same opcode and inputs as "this". ++bool Node::is_similar(Node* node) { ++ if (this == node) { ++ return true; ++ } else { ++ if (is_Opcode_equal(node) && (req() == node->req())) { ++ for (uint i = 0; i < node->req(); i++) { ++ if (in(i) != node->in(i)) { ++ return false; ++ } ++ } ++ return true; ++ } ++ } ++ return false; ++} ++ ++bool Node::is_Opcode_equal(Node* node) { ++ return Opcode() == node->Opcode(); ++} + + //--------------------------unique_ctrl_out------------------------------ + // Return the unique control out if only one. Null if none or more than one. +diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp +index 0c0b9bf69..e24456d85 100644 +--- a/src/hotspot/share/opto/node.hpp ++++ b/src/hotspot/share/opto/node.hpp +@@ -1030,6 +1030,11 @@ public: + // be found; Otherwise return NULL; + Node* find_similar(int opc); + ++ // True if a node has the same opcode and inputs as "this". ++ bool is_similar(Node* node); ++ ++ virtual bool is_Opcode_equal(Node* node); ++ + // Return the unique control out if only one. Null if none or more than one. + Node* unique_ctrl_out() const; + +diff --git a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp +index e31e8d847..aa0483c73 100644 +--- a/src/hotspot/share/opto/opcodes.cpp ++++ b/src/hotspot/share/opto/opcodes.cpp +@@ -38,12 +38,14 @@ const char *NodeClassNames[] = { + "RegF", + "RegD", + "RegL", +- "RegFlags", ++ "VecA", + "VecS", + "VecD", + "VecX", + "VecY", + "VecZ", ++ "RegVMask", ++ "RegFlags", + "_last_machine_leaf", + #include "classes.hpp" + "_last_class_name", +diff --git a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp +index ae3d61ce0..0a77c3732 100644 +--- a/src/hotspot/share/opto/opcodes.hpp ++++ b/src/hotspot/share/opto/opcodes.hpp +@@ -37,11 +37,13 @@ enum Opcodes { + macro(RegF) // Machine float register + macro(RegD) // Machine double register + macro(RegL) // Machine long register ++ macro(VecA) // Machine vectora register + macro(VecS) // Machine vectors register + macro(VecD) // Machine vectord register + macro(VecX) // Machine vectorx register + macro(VecY) // Machine vectory register + macro(VecZ) // Machine vectorz register ++ macro(RegVMask) // Vector mask/predicate register + macro(RegFlags) // Machine flags register + _last_machine_leaf, // Split between regular opcodes and machine + #include "classes.hpp" +diff --git a/src/hotspot/share/opto/phase.cpp b/src/hotspot/share/opto/phase.cpp +index 397a53713..89c7fc7c8 100644 +--- a/src/hotspot/share/opto/phase.cpp ++++ b/src/hotspot/share/opto/phase.cpp +@@ -113,6 +113,7 @@ void Phase::print_timers() { + tty->print_cr (" Regalloc Split: %7.3f s", timers[_t_regAllocSplit].seconds()); + tty->print_cr (" Postalloc Copy Rem: %7.3f s", timers[_t_postAllocCopyRemoval].seconds()); + tty->print_cr (" Merge multidefs: %7.3f s", timers[_t_mergeMultidefs].seconds()); ++ tty->print_cr (" Merge debugdefs: %7.3f s", timers[_t_mergeDebugdefs].seconds()); + tty->print_cr (" Fixup Spills: %7.3f s", timers[_t_fixupSpills].seconds()); + tty->print_cr (" Compact: %7.3f s", timers[_t_chaitinCompact].seconds()); + tty->print_cr (" Coalesce 1: %7.3f s", timers[_t_chaitinCoalesce1].seconds()); +@@ -130,6 +131,7 @@ void Phase::print_timers() { + timers[_t_regAllocSplit].seconds() + + timers[_t_postAllocCopyRemoval].seconds() + + timers[_t_mergeMultidefs].seconds() + ++ timers[_t_mergeDebugdefs].seconds() + + timers[_t_fixupSpills].seconds() + + timers[_t_chaitinCompact].seconds() + + timers[_t_chaitinCoalesce1].seconds() + +diff --git a/src/hotspot/share/opto/phase.hpp b/src/hotspot/share/opto/phase.hpp +index 4b0c53ffc..b3302ec86 100644 +--- a/src/hotspot/share/opto/phase.hpp ++++ b/src/hotspot/share/opto/phase.hpp +@@ -91,6 +91,7 @@ public: + _t_regAllocSplit, + _t_postAllocCopyRemoval, + _t_mergeMultidefs, ++ _t_mergeDebugdefs, + _t_fixupSpills, + _t_chaitinCompact, + _t_chaitinCoalesce1, +diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp +index 46766b604..3f608bb40 100644 +--- a/src/hotspot/share/opto/postaloc.cpp ++++ b/src/hotspot/share/opto/postaloc.cpp +@@ -27,6 +27,7 @@ + #include "memory/resourceArea.hpp" + #include "opto/chaitin.hpp" + #include "opto/machnode.hpp" ++#include "opto/addnode.hpp" + + // See if this register (or pairs, or vector) already contains the value. + static bool register_contains_value(Node* val, OptoReg::Name reg, int n_regs, +@@ -266,9 +267,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v + Node *val = skip_copies(n->in(k)); + if (val == x) return blk_adjust; // No progress? + +- int n_regs = RegMask::num_registers(val->ideal_reg()); + uint val_idx = _lrg_map.live_range_id(val); + OptoReg::Name val_reg = lrgs(val_idx).reg(); ++ int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx)); + + // See if it happens to already be in the correct register! + // (either Phi's direct register, or the common case of the name +@@ -305,8 +306,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v + } + + Node *vv = value[reg]; ++ // For scalable register, number of registers may be inconsistent between ++ // "val_reg" and "reg". For example, when "val" resides in register ++ // but "reg" is located in stack. ++ if (lrgs(val_idx).is_scalable()) { ++ assert(val->ideal_reg() == Op_VecA, "scalable vector register"); ++ if (OptoReg::is_stack(reg)) { ++ n_regs = lrgs(val_idx).scalable_reg_slots(); ++ } else { ++ n_regs = RegMask::SlotsPerVecA; ++ } ++ } + if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set +- uint last = (n_regs-1); // Looking for the last part of a set ++ uint last; ++ if (lrgs(val_idx).is_scalable()) { ++ assert(val->ideal_reg() == Op_VecA, "scalable vector register"); ++ // For scalable vector register, regmask is always SlotsPerVecA bits aligned ++ last = RegMask::SlotsPerVecA - 1; ++ } else { ++ last = (n_regs-1); // Looking for the last part of a set ++ } + if ((reg&last) != last) continue; // Wrong part of a set + if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value + } +@@ -410,6 +429,28 @@ void PhaseChaitin::merge_multidefs() { + } + } + ++void PhaseChaitin::merge_debugdefs() { ++ Compile::TracePhase tp("merge_Debugdefs", &timers[_t_mergeDebugdefs]); ++ ++ ResourceMark rm; ++ for (uint i = 0; i < _cfg.number_of_blocks(); i++) { ++ Block* block = _cfg.get_block(i); ++ for (int j = 0; j < (int) block->number_of_nodes(); j++) { ++ Node* base = block->get_node(j); ++ if (base && base->is_Mach() && base->outcnt() == 1) { ++ Node* addp = base->unique_out(); ++ if (addp && addp->is_Mach() && addp->as_Mach()->ideal_Opcode() == Op_AddP) { ++ Node* derived = addp->in(AddPNode::Address); ++ if (base == addp->in(AddPNode::Base) && base->is_similar(derived)) { ++ base->subsume_by(derived, Compile::current()); ++ block->remove_node(j--); ++ } ++ } ++ } ++ } ++ } ++} ++ + int PhaseChaitin::possibly_merge_multidef(Node *n, uint k, Block *block, RegToDefUseMap& reg2defuse) { + int blk_adjust = 0; + +@@ -591,7 +632,7 @@ void PhaseChaitin::post_allocate_copy_removal() { + uint k; + Node *phi = block->get_node(j); + uint pidx = _lrg_map.live_range_id(phi); +- OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg(); ++ OptoReg::Name preg = lrgs(pidx).reg(); + + // Remove copies remaining on edges. Check for junk phi. + Node *u = NULL; +@@ -619,7 +660,7 @@ void PhaseChaitin::post_allocate_copy_removal() { + if( pidx ) { + value.map(preg,phi); + regnd.map(preg,phi); +- int n_regs = RegMask::num_registers(phi->ideal_reg()); ++ int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx)); + for (int l = 1; l < n_regs; l++) { + OptoReg::Name preg_lo = OptoReg::add(preg,-l); + value.map(preg_lo,phi); +@@ -663,7 +704,7 @@ void PhaseChaitin::post_allocate_copy_removal() { + regnd.map(ureg, def); + // Record other half of doubles + uint def_ideal_reg = def->ideal_reg(); +- int n_regs = RegMask::num_registers(def_ideal_reg); ++ int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def))); + for (int l = 1; l < n_regs; l++) { + OptoReg::Name ureg_lo = OptoReg::add(ureg,-l); + if (!value[ureg_lo] && +@@ -707,7 +748,7 @@ void PhaseChaitin::post_allocate_copy_removal() { + } + + uint n_ideal_reg = n->ideal_reg(); +- int n_regs = RegMask::num_registers(n_ideal_reg); ++ int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx)); + if (n_regs == 1) { + // If Node 'n' does not change the value mapped by the register, + // then 'n' is a useless copy. Do not update the register->node +diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp +index 2e04c42eb..34a701e84 100644 +--- a/src/hotspot/share/opto/regmask.cpp ++++ b/src/hotspot/share/opto/regmask.cpp +@@ -24,6 +24,7 @@ + + #include "precompiled.hpp" + #include "opto/ad.hpp" ++#include "opto/chaitin.hpp" + #include "opto/compile.hpp" + #include "opto/matcher.hpp" + #include "opto/node.hpp" +@@ -116,30 +117,47 @@ const RegMask RegMask::Empty( + + //============================================================================= + bool RegMask::is_vector(uint ireg) { +- return (ireg == Op_VecS || ireg == Op_VecD || ++ return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD || + ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ); + } + + int RegMask::num_registers(uint ireg) { + switch(ireg) { + case Op_VecZ: +- return 16; ++ return SlotsPerVecZ; + case Op_VecY: +- return 8; ++ return SlotsPerVecY; + case Op_VecX: +- return 4; ++ return SlotsPerVecX; + case Op_VecD: ++ return SlotsPerVecD; + case Op_RegD: + case Op_RegL: + #ifdef _LP64 + case Op_RegP: + #endif + return 2; ++ case Op_VecA: ++ assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); ++ return SlotsPerVecA; + } + // Op_VecS and the rest ideal registers. + return 1; + } + ++int RegMask::num_registers(uint ireg, LRG &lrg) { ++ int n_regs = num_registers(ireg); ++ ++ // assigned is OptoReg which is selected by register allocator ++ OptoReg::Name assigned = lrg.reg(); ++ assert(OptoReg::is_valid(assigned), "should be valid opto register"); ++ ++ if (lrg.is_scalable() && OptoReg::is_stack(assigned)) { ++ n_regs = lrg.scalable_reg_slots(); ++ } ++ return n_regs; ++} ++ + //------------------------------find_first_pair-------------------------------- + // Find the lowest-numbered register pair in the mask. Return the + // HIGHEST register number in the pair, or BAD if no pairs. +@@ -238,14 +256,30 @@ int RegMask::is_bound_pair() const { + return true; + } + ++// Check that whether given reg number with size is valid ++// for current regmask, where reg is the highest number. ++bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const { ++ for (int i = 0; i < size; i++) { ++ if (!Member(reg - i)) { ++ return false; ++ } ++ } ++ return true; ++} ++ + // only indicies of power 2 are accessed, so index 3 is only filled in for storage. + static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 }; + //------------------------------find_first_set--------------------------------- + // Find the lowest-numbered register set in the mask. Return the + // HIGHEST register number in the set, or BAD if no sets. + // Works also for size 1. +-OptoReg::Name RegMask::find_first_set(const int size) const { +- verify_sets(size); ++OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const { ++ if (lrg.is_scalable()) { ++ // For scalable vector register, regmask is SlotsPerVecA bits aligned. ++ assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets"); ++ } else { ++ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); ++ } + for (int i = 0; i < RM_SIZE; i++) { + if (_A[i]) { // Found some bits + int bit = _A[i] & -_A[i]; // Extract low bit +diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp +index c64d08795..2688275be 100644 +--- a/src/hotspot/share/opto/regmask.hpp ++++ b/src/hotspot/share/opto/regmask.hpp +@@ -28,6 +28,8 @@ + #include "code/vmreg.hpp" + #include "opto/optoreg.hpp" + ++class LRG; ++ + // Some fun naming (textual) substitutions: + // + // RegMask::get_low_elem() ==> RegMask::find_first_elem() +@@ -95,6 +97,7 @@ public: + // requirement is internal to the allocator, and independent of any + // particular platform. + enum { SlotsPerLong = 2, ++ SlotsPerVecA = RISCV_ONLY(4) NOT_RISCV(8), + SlotsPerVecS = 1, + SlotsPerVecD = 2, + SlotsPerVecX = 4, +@@ -204,10 +207,14 @@ public: + return false; + } + ++ // Check that whether given reg number with size is valid ++ // for current regmask, where reg is the highest number. ++ bool is_valid_reg(OptoReg::Name reg, const int size) const; ++ + // Find the lowest-numbered register set in the mask. Return the + // HIGHEST register number in the set, or BAD if no sets. + // Assert that the mask contains only bit sets. +- OptoReg::Name find_first_set(const int size) const; ++ OptoReg::Name find_first_set(LRG &lrg, const int size) const; + + // Clear out partial bits; leave only aligned adjacent bit sets of size. + void clear_to_sets(const int size); +@@ -226,6 +233,7 @@ public: + + static bool is_vector(uint ireg); + static int num_registers(uint ireg); ++ static int num_registers(uint ireg, LRG &lrg); + + // Fast overlap test. Non-zero if any registers in common. + int overlap( const RegMask &rm ) const { +diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp +index fed52e488..ee583236f 100644 +--- a/src/hotspot/share/opto/superword.cpp ++++ b/src/hotspot/share/opto/superword.cpp +@@ -96,8 +96,11 @@ static const bool _do_vector_loop_experimental = false; // Experimental vectoriz + //------------------------------transform_loop--------------------------- + void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { + assert(UseSuperWord, "should be"); +- // Do vectors exist on this architecture? +- if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; ++ // SuperWord only works with power of two vector sizes. ++ int vector_width = Matcher::vector_width_in_bytes(T_BYTE); ++ if (vector_width < 2 || !is_power_of_2(vector_width)) { ++ return; ++ } + + assert(lpt->_head->is_CountedLoop(), "must be"); + CountedLoopNode *cl = lpt->_head->as_CountedLoop(); +diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp +index 7d767c47c..c9948df5f 100644 +--- a/src/hotspot/share/opto/type.cpp ++++ b/src/hotspot/share/opto/type.cpp +@@ -79,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = { + { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY + { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ + #else // all other ++ { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA + { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS + { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD + { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX +@@ -655,6 +656,10 @@ void Type::Initialize_shared(Compile* current) { + // get_zero_type() should not happen for T_CONFLICT + _zero_type[T_CONFLICT]= NULL; + ++ if (Matcher::supports_scalable_vector()) { ++ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE)); ++ } ++ + // Vector predefined types, it needs initialized _const_basic_type[]. + if (Matcher::vector_size_supported(T_BYTE,4)) { + TypeVect::VECTS = TypeVect::make(T_BYTE,4); +@@ -671,6 +676,7 @@ void Type::Initialize_shared(Compile* current) { + if (Matcher::vector_size_supported(T_FLOAT,16)) { + TypeVect::VECTZ = TypeVect::make(T_FLOAT,16); + } ++ mreg2type[Op_VecA] = TypeVect::VECTA; + mreg2type[Op_VecS] = TypeVect::VECTS; + mreg2type[Op_VecD] = TypeVect::VECTD; + mreg2type[Op_VecX] = TypeVect::VECTX; +@@ -990,6 +996,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = { + + Bad, // Tuple - handled in v-call + Bad, // Array - handled in v-call ++ Bad, // VectorA - handled in v-call + Bad, // VectorS - handled in v-call + Bad, // VectorD - handled in v-call + Bad, // VectorX - handled in v-call +@@ -2329,6 +2336,7 @@ bool TypeAry::ary_must_be_exact() const { + + //==============================TypeVect======================================= + // Convenience common pre-built types. ++const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic + const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors + const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors + const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors +@@ -2339,10 +2347,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors + const TypeVect* TypeVect::make(const Type *elem, uint length) { + BasicType elem_bt = elem->array_element_basic_type(); + assert(is_java_primitive(elem_bt), "only primitive types in vector"); +- assert(length > 1 && is_power_of_2(length), "vector length is power of 2"); + assert(Matcher::vector_size_supported(elem_bt, length), "length in range"); + int size = length * type2aelembytes(elem_bt); + switch (Matcher::vector_ideal_reg(size)) { ++ case Op_VecA: ++ return (TypeVect*)(new TypeVectA(elem, length))->hashcons(); + case Op_VecS: + return (TypeVect*)(new TypeVectS(elem, length))->hashcons(); + case Op_RegL: +@@ -2375,6 +2384,7 @@ const Type *TypeVect::xmeet( const Type *t ) const { + default: // All else is a mistake + typerr(t); + ++ case VectorA: + case VectorS: + case VectorD: + case VectorX: +@@ -2429,6 +2439,8 @@ bool TypeVect::empty(void) const { + #ifndef PRODUCT + void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const { + switch (base()) { ++ case VectorA: ++ st->print("vectora["); break; + case VectorS: + st->print("vectors["); break; + case VectorD: +diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp +index 27d042d94..82ee2dfcb 100644 +--- a/src/hotspot/share/opto/type.hpp ++++ b/src/hotspot/share/opto/type.hpp +@@ -53,6 +53,7 @@ class TypeNarrowKlass; + class TypeAry; + class TypeTuple; + class TypeVect; ++class TypeVectA; + class TypeVectS; + class TypeVectD; + class TypeVectX; +@@ -87,6 +88,7 @@ public: + + Tuple, // Method signature or object layout + Array, // Array types ++ VectorA, // (Scalable) Vector types for vector length agnostic + VectorS, // 32bit Vector types + VectorD, // 64bit Vector types + VectorX, // 128bit Vector types +@@ -769,6 +771,7 @@ public: + virtual const Type *xmeet( const Type *t) const; + virtual const Type *xdual() const; // Compute dual right now. + ++ static const TypeVect *VECTA; + static const TypeVect *VECTS; + static const TypeVect *VECTD; + static const TypeVect *VECTX; +@@ -780,6 +783,11 @@ public: + #endif + }; + ++class TypeVectA : public TypeVect { ++ friend class TypeVect; ++ TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {} ++}; ++ + class TypeVectS : public TypeVect { + friend class TypeVect; + TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {} +@@ -1630,12 +1638,12 @@ inline const TypeAry *Type::is_ary() const { + } + + inline const TypeVect *Type::is_vect() const { +- assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" ); ++ assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" ); + return (TypeVect*)this; + } + + inline const TypeVect *Type::isa_vect() const { +- return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL; ++ return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL; + } + + inline const TypePtr *Type::is_ptr() const { +diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp +index de22591ba..b82d631f4 100644 +--- a/src/hotspot/share/opto/vectornode.cpp ++++ b/src/hotspot/share/opto/vectornode.cpp +@@ -236,7 +236,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) { + (vlen > 1) && is_power_of_2(vlen) && + Matcher::vector_size_supported(bt, vlen)) { + int vopc = VectorNode::opcode(opc, bt); +- return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen); ++ return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen, bt); + } + return false; + } +@@ -655,7 +655,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) { + (vlen > 1) && is_power_of_2(vlen) && + Matcher::vector_size_supported(bt, vlen)) { + int vopc = ReductionNode::opcode(opc, bt); +- return vopc != opc && Matcher::match_rule_supported(vopc); ++ return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt); + } + return false; + } +diff --git a/src/hotspot/share/runtime/abstract_vm_version.cpp b/src/hotspot/share/runtime/abstract_vm_version.cpp +index c46247f2b..ee769634f 100644 +--- a/src/hotspot/share/runtime/abstract_vm_version.cpp ++++ b/src/hotspot/share/runtime/abstract_vm_version.cpp +@@ -98,8 +98,13 @@ bool Abstract_VM_Version::_parallel_worker_threads_initialized = false; + #ifdef ZERO + #define VMTYPE "Zero" + #else // ZERO +- #define VMTYPE COMPILER1_PRESENT("Client") \ +- COMPILER2_PRESENT("Server") ++ #ifdef COMPILER2 ++ #define VMTYPE "Server" ++ #elif defined(COMPILER1) ++ #define VMTYPE "Client" ++ #else ++ #define VMTYPE "Core" ++ #endif // COMPILER2 + #endif // ZERO + #endif // TIERED + #endif +@@ -196,7 +201,8 @@ const char* Abstract_VM_Version::jre_release_version() { + IA32_ONLY("x86") \ + IA64_ONLY("ia64") \ + S390_ONLY("s390") \ +- SPARC_ONLY("sparc") ++ SPARC_ONLY("sparc") \ ++ RISCV64_ONLY("riscv64") + #endif // !ZERO + #endif // !CPU + +diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp +index 0a9c45f85..a96c2dd81 100644 +--- a/src/hotspot/share/runtime/thread.hpp ++++ b/src/hotspot/share/runtime/thread.hpp +@@ -1234,7 +1234,7 @@ class JavaThread: public Thread { + address last_Java_pc(void) { return _anchor.last_Java_pc(); } + + // Safepoint support +-#if !(defined(PPC64) || defined(AARCH64)) ++#if !(defined(PPC64) || defined(AARCH64) || defined(RISCV64)) + JavaThreadState thread_state() const { return _thread_state; } + void set_thread_state(JavaThreadState s) { + assert(current_or_null() == NULL || current_or_null() == this, +diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp +index dee8534f7..aa71d7655 100644 +--- a/src/hotspot/share/runtime/thread.inline.hpp ++++ b/src/hotspot/share/runtime/thread.inline.hpp +@@ -142,7 +142,7 @@ inline void JavaThread::set_pending_async_exception(oop e) { + set_has_async_exception(); + } + +-#if defined(PPC64) || defined (AARCH64) ++#if defined(PPC64) || defined(AARCH64) || defined(RISCV64) + inline JavaThreadState JavaThread::thread_state() const { + return (JavaThreadState) OrderAccess::load_acquire((volatile jint*)&_thread_state); + } +diff --git a/src/hotspot/share/utilities/debug.cpp b/src/hotspot/share/utilities/debug.cpp +index 0b898dcc3..7f76486ae 100644 +--- a/src/hotspot/share/utilities/debug.cpp ++++ b/src/hotspot/share/utilities/debug.cpp +@@ -632,6 +632,7 @@ void help() { + tty->print_cr(" pns($sp, $rbp, $pc) on Linux/amd64 and Solaris/amd64 or"); + tty->print_cr(" pns($sp, $ebp, $pc) on Linux/x86 or"); + tty->print_cr(" pns($sp, $fp, $pc) on Linux/AArch64 or"); ++ tty->print_cr(" pns($sp, $fp, $pc) on Linux/RISCV64 or"); + tty->print_cr(" pns($sp, 0, $pc) on Linux/ppc64 or"); + tty->print_cr(" pns($sp + 0x7ff, 0, $pc) on Solaris/SPARC"); + tty->print_cr(" - in gdb do 'set overload-resolution off' before calling pns()"); +diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp +index cf8025386..e8ab3097a 100644 +--- a/src/hotspot/share/utilities/macros.hpp ++++ b/src/hotspot/share/utilities/macros.hpp +@@ -597,6 +597,32 @@ + + #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x)) + ++#if defined(RISCV32) || defined(RISCV64) ++#define RISCV ++#define RISCV_ONLY(code) code ++#define NOT_RISCV(code) ++#else ++#undef RISCV ++#define RISCV_ONLY(code) ++#define NOT_RISCV(code) code ++#endif ++ ++#ifdef RISCV32 ++#define RISCV32_ONLY(code) code ++#define NOT_RISCV32(code) ++#else ++#define RISCV32_ONLY(code) ++#define NOT_RISCV32(code) code ++#endif ++ ++#ifdef RISCV64 ++#define RISCV64_ONLY(code) code ++#define NOT_RISCV64(code) ++#else ++#define RISCV64_ONLY(code) ++#define NOT_RISCV64(code) code ++#endif ++ + #ifdef VM_LITTLE_ENDIAN + #define LITTLE_ENDIAN_ONLY(code) code + #define BIG_ENDIAN_ONLY(code) +diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java +index 063a5ef3a..50e9cdb57 100644 +--- a/src/java.base/share/classes/java/lang/StringLatin1.java ++++ b/src/java.base/share/classes/java/lang/StringLatin1.java +@@ -209,6 +209,11 @@ final class StringLatin1 { + // Note: fromIndex might be near -1>>>1. + return -1; + } ++ return indexOfChar(value, ch, fromIndex, max); ++ } ++ ++ @HotSpotIntrinsicCandidate ++ private static int indexOfChar(byte[] value, int ch, int fromIndex, int max) { + byte c = (byte)ch; + for (int i = fromIndex; i < max; i++) { + if (value[i] == c) { +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +index 0d834302c..55a7b96f7 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +@@ -58,6 +58,10 @@ + #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" + #endif + ++#ifdef riscv64 ++#include "sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext.h" ++#endif ++ + static jfieldID p_ps_prochandle_ID = 0; + static jfieldID threadList_ID = 0; + static jfieldID loadObjectList_ID = 0; +@@ -397,7 +401,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + return (err == PS_OK)? array : 0; + } + +-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) ++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(riscv64) + JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 + (JNIEnv *env, jobject this_obj, jint lwp_id) { + +@@ -422,6 +426,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + #ifdef aarch64 + #define NPRGREG sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_NPRGREG + #endif ++#ifdef riscv64 ++#define NPRGREG sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_NPRGREG ++#endif + #if defined(sparc) || defined(sparcv9) + #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG + #endif +@@ -534,6 +541,46 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo + } + #endif /* aarch64 */ + ++#if defined(riscv64) ++ ++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_riscv64_RISCV64ThreadContext_##reg ++ ++ { ++ regs[REG_INDEX(PC)] = gregs.pc; ++ regs[REG_INDEX(LR)] = gregs.ra; ++ regs[REG_INDEX(SP)] = gregs.sp; ++ regs[REG_INDEX(R3)] = gregs.gp; ++ regs[REG_INDEX(R4)] = gregs.tp; ++ regs[REG_INDEX(R5)] = gregs.t0; ++ regs[REG_INDEX(R6)] = gregs.t1; ++ regs[REG_INDEX(R7)] = gregs.t2; ++ regs[REG_INDEX(R8)] = gregs.s0; ++ regs[REG_INDEX(R9)] = gregs.s1; ++ regs[REG_INDEX(R10)] = gregs.a0; ++ regs[REG_INDEX(R11)] = gregs.a1; ++ regs[REG_INDEX(R12)] = gregs.a2; ++ regs[REG_INDEX(R13)] = gregs.a3; ++ regs[REG_INDEX(R14)] = gregs.a4; ++ regs[REG_INDEX(R15)] = gregs.a5; ++ regs[REG_INDEX(R16)] = gregs.a6; ++ regs[REG_INDEX(R17)] = gregs.a7; ++ regs[REG_INDEX(R18)] = gregs.s2; ++ regs[REG_INDEX(R19)] = gregs.s3; ++ regs[REG_INDEX(R20)] = gregs.s4; ++ regs[REG_INDEX(R21)] = gregs.s5; ++ regs[REG_INDEX(R22)] = gregs.s6; ++ regs[REG_INDEX(R23)] = gregs.s7; ++ regs[REG_INDEX(R24)] = gregs.s8; ++ regs[REG_INDEX(R25)] = gregs.s9; ++ regs[REG_INDEX(R26)] = gregs.s10; ++ regs[REG_INDEX(R27)] = gregs.s11; ++ regs[REG_INDEX(R28)] = gregs.t3; ++ regs[REG_INDEX(R29)] = gregs.t4; ++ regs[REG_INDEX(R30)] = gregs.t5; ++ regs[REG_INDEX(R31)] = gregs.t6; ++ } ++#endif /* riscv64 */ ++ + #if defined(ppc64) || defined(ppc64le) + #define REG_INDEX(reg) sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_##reg + +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +index 8318e8e02..9d7fda8a6 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +@@ -43,6 +43,8 @@ + #elif defined(arm) + #include + #define user_regs_struct pt_regs ++#elif defined(riscv64) ++#include + #endif + + // This C bool type must be int for compatibility with Linux calls and +diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +index de5254d85..12eafc455 100644 +--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c ++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +@@ -134,6 +134,9 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use + #define ptrace_getregs(request, pid, addr, data) ptrace(request, pid, data, addr) + #endif + ++// riscv kernel didn't implement compat_arch_ptrace function that will handle PT_GETREGS case ++// like other platforms, so call ptrace with PTRACE_GETREGSET here. ++#ifndef riscv64 + #if defined(_LP64) && defined(PTRACE_GETREGS64) + #define PTRACE_GETREGS_REQ PTRACE_GETREGS64 + #elif defined(PTRACE_GETREGS) +@@ -141,6 +144,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use + #elif defined(PT_GETREGS) + #define PTRACE_GETREGS_REQ PT_GETREGS + #endif ++#endif + + #ifdef PTRACE_GETREGS_REQ + if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +index 0f5f0119c..82c083055 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java +@@ -1,6 +1,7 @@ + /* + * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Azul Systems, Inc. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -36,6 +37,7 @@ import sun.jvm.hotspot.debugger.MachineDescription; + import sun.jvm.hotspot.debugger.MachineDescriptionAMD64; + import sun.jvm.hotspot.debugger.MachineDescriptionPPC64; + import sun.jvm.hotspot.debugger.MachineDescriptionAArch64; ++import sun.jvm.hotspot.debugger.MachineDescriptionRISCV64; + import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; + import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; + import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; +@@ -592,6 +594,8 @@ public class HotSpotAgent { + machDesc = new MachineDescriptionPPC64(); + } else if (cpu.equals("aarch64")) { + machDesc = new MachineDescriptionAArch64(); ++ } else if (cpu.equals("riscv64")) { ++ machDesc = new MachineDescriptionRISCV64(); + } else if (cpu.equals("sparc")) { + if (LinuxDebuggerLocal.getAddressSize()==8) { + machDesc = new MachineDescriptionSPARC64Bit(); +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java +new file mode 100644 +index 000000000..4221937f1 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionRISCV64.java +@@ -0,0 +1,40 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger; ++ ++public class MachineDescriptionRISCV64 extends MachineDescriptionTwosComplement implements MachineDescription { ++ public long getAddressSize() { ++ return 8; ++ } ++ ++ public boolean isLP64() { ++ return true; ++ } ++ ++ public boolean isBigEndian() { ++ return false; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +index 5e5a6bb71..acd5844ca 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +@@ -33,6 +33,7 @@ import sun.jvm.hotspot.debugger.cdbg.*; + import sun.jvm.hotspot.debugger.x86.*; + import sun.jvm.hotspot.debugger.amd64.*; + import sun.jvm.hotspot.debugger.aarch64.*; ++import sun.jvm.hotspot.debugger.riscv64.*; + import sun.jvm.hotspot.debugger.sparc.*; + import sun.jvm.hotspot.debugger.ppc64.*; + import sun.jvm.hotspot.debugger.linux.x86.*; +@@ -40,6 +41,7 @@ import sun.jvm.hotspot.debugger.linux.amd64.*; + import sun.jvm.hotspot.debugger.linux.sparc.*; + import sun.jvm.hotspot.debugger.linux.ppc64.*; + import sun.jvm.hotspot.debugger.linux.aarch64.*; ++import sun.jvm.hotspot.debugger.linux.riscv64.*; + import sun.jvm.hotspot.utilities.*; + + class LinuxCDebugger implements CDebugger { +@@ -116,7 +118,14 @@ class LinuxCDebugger implements CDebugger { + Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); + if (pc == null) return null; + return new LinuxAARCH64CFrame(dbg, fp, pc); +- } else { ++ } else if (cpu.equals("riscv64")) { ++ RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext(); ++ Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP); ++ if (fp == null) return null; ++ Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC); ++ if (pc == null) return null; ++ return new LinuxRISCV64CFrame(dbg, fp, pc); ++ } else { + // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu + ThreadContext context = (ThreadContext) thread.getContext(); + return context.getTopFrame(dbg); +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java +new file mode 100644 +index 000000000..eaef586b4 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64CFrame.java +@@ -0,0 +1,90 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.linux.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++import sun.jvm.hotspot.debugger.cdbg.basic.*; ++ ++public final class LinuxRISCV64CFrame extends BasicCFrame { ++ private static final int C_FRAME_LINK_OFFSET = -2; ++ private static final int C_FRAME_RETURN_ADDR_OFFSET = -1; ++ ++ public LinuxRISCV64CFrame(LinuxDebugger dbg, Address fp, Address pc) { ++ super(dbg.getCDebugger()); ++ this.fp = fp; ++ this.pc = pc; ++ this.dbg = dbg; ++ } ++ ++ // override base class impl to avoid ELF parsing ++ public ClosestSymbol closestSymbolToPC() { ++ // try native lookup in debugger. ++ return dbg.lookup(dbg.getAddressValue(pc())); ++ } ++ ++ public Address pc() { ++ return pc; ++ } ++ ++ public Address localVariableBase() { ++ return fp; ++ } ++ ++ public CFrame sender(ThreadProxy thread) { ++ RISCV64ThreadContext context = (RISCV64ThreadContext) thread.getContext(); ++ Address rsp = context.getRegisterAsAddress(RISCV64ThreadContext.SP); ++ ++ if ((fp == null) || fp.lessThan(rsp)) { ++ return null; ++ } ++ ++ // Check alignment of fp ++ if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) { ++ return null; ++ } ++ ++ Address nextFP = fp.getAddressAt(C_FRAME_LINK_OFFSET * ADDRESS_SIZE); ++ if (nextFP == null || nextFP.lessThanOrEqual(fp)) { ++ return null; ++ } ++ Address nextPC = fp.getAddressAt(C_FRAME_RETURN_ADDR_OFFSET * ADDRESS_SIZE); ++ if (nextPC == null) { ++ return null; ++ } ++ return new LinuxRISCV64CFrame(dbg, nextFP, nextPC); ++ } ++ ++ // package/class internals only ++ private static final int ADDRESS_SIZE = 8; ++ private Address pc; ++ private Address sp; ++ private Address fp; ++ private LinuxDebugger dbg; ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java +new file mode 100644 +index 000000000..4789e664c +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/riscv64/LinuxRISCV64ThreadContext.java +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.linux.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.linux.*; ++ ++public class LinuxRISCV64ThreadContext extends RISCV64ThreadContext { ++ private LinuxDebugger debugger; ++ ++ public LinuxRISCV64ThreadContext(LinuxDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java +index 74e957d94..1f44d75ee 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java +@@ -32,12 +32,14 @@ import sun.jvm.hotspot.debugger.*; + import sun.jvm.hotspot.debugger.cdbg.*; + import sun.jvm.hotspot.debugger.proc.amd64.*; + import sun.jvm.hotspot.debugger.proc.aarch64.*; ++import sun.jvm.hotspot.debugger.proc.riscv64.*; + import sun.jvm.hotspot.debugger.proc.sparc.*; + import sun.jvm.hotspot.debugger.proc.ppc64.*; + import sun.jvm.hotspot.debugger.proc.x86.*; + import sun.jvm.hotspot.debugger.ppc64.*; + import sun.jvm.hotspot.debugger.amd64.*; + import sun.jvm.hotspot.debugger.aarch64.*; ++import sun.jvm.hotspot.debugger.riscv64.*; + import sun.jvm.hotspot.debugger.sparc.*; + import sun.jvm.hotspot.debugger.x86.*; + import sun.jvm.hotspot.utilities.*; +@@ -94,6 +96,10 @@ public class ProcDebuggerLocal extends DebuggerBase implements ProcDebugger { + threadFactory = new ProcAARCH64ThreadFactory(this); + pcRegIndex = AARCH64ThreadContext.PC; + fpRegIndex = AARCH64ThreadContext.FP; ++ } else if (cpu.equals("riscv64")) { ++ threadFactory = new ProcRISCV64ThreadFactory(this); ++ pcRegIndex = RISCV64ThreadContext.PC; ++ fpRegIndex = RISCV64ThreadContext.FP; + } else if (cpu.equals("ppc64")) { + threadFactory = new ProcPPC64ThreadFactory(this); + pcRegIndex = PPC64ThreadContext.PC; +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java +new file mode 100644 +index 000000000..c1cf1fb0f +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64Thread.java +@@ -0,0 +1,88 @@ ++/* ++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class ProcRISCV64Thread implements ThreadProxy { ++ private ProcDebugger debugger; ++ private int id; ++ ++ public ProcRISCV64Thread(ProcDebugger debugger, Address addr) { ++ this.debugger = debugger; ++ ++ // FIXME: the size here should be configurable. However, making it ++ // so would produce a dependency on the "types" package from the ++ // debugger package, which is not desired. ++ this.id = (int) addr.getCIntegerAt(0, 4, true); ++ } ++ ++ public ProcRISCV64Thread(ProcDebugger debugger, long id) { ++ this.debugger = debugger; ++ this.id = (int) id; ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ ProcRISCV64ThreadContext context = new ProcRISCV64ThreadContext(debugger); ++ long[] regs = debugger.getThreadIntegerRegisterSet(id); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size mismatch"); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++ ++ public boolean canSetContext() throws DebuggerException { ++ return false; ++ } ++ ++ public void setContext(ThreadContext context) ++ throws IllegalThreadStateException, DebuggerException { ++ throw new DebuggerException("Unimplemented"); ++ } ++ ++ public String toString() { ++ return "t@" + id; ++ } ++ ++ public boolean equals(Object obj) { ++ if ((obj == null) || !(obj instanceof ProcRISCV64Thread)) { ++ return false; ++ } ++ ++ return (((ProcRISCV64Thread) obj).id == id); ++ } ++ ++ public int hashCode() { ++ return id; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java +new file mode 100644 +index 000000000..498fa0dc6 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadContext.java +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcRISCV64ThreadContext extends RISCV64ThreadContext { ++ private ProcDebugger debugger; ++ ++ public ProcRISCV64ThreadContext(ProcDebugger debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java +new file mode 100644 +index 000000000..81afd8fdc +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/riscv64/ProcRISCV64ThreadFactory.java +@@ -0,0 +1,46 @@ ++/* ++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.proc.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.proc.*; ++ ++public class ProcRISCV64ThreadFactory implements ProcThreadFactory { ++ private ProcDebugger debugger; ++ ++ public ProcRISCV64ThreadFactory(ProcDebugger debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new ProcRISCV64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new ProcRISCV64Thread(debugger, id); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java +new file mode 100644 +index 000000000..ab92e3e74 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64Thread.java +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class RemoteRISCV64Thread extends RemoteThread { ++ public RemoteRISCV64Thread(RemoteDebuggerClient debugger, Address addr) { ++ super(debugger, addr); ++ } ++ ++ public RemoteRISCV64Thread(RemoteDebuggerClient debugger, long id) { ++ super(debugger, id); ++ } ++ ++ public ThreadContext getContext() throws IllegalThreadStateException { ++ RemoteRISCV64ThreadContext context = new RemoteRISCV64ThreadContext(debugger); ++ long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : ++ debugger.getThreadIntegerRegisterSet(id); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(regs.length == RISCV64ThreadContext.NPRGREG, "size of register set must match"); ++ } ++ for (int i = 0; i < regs.length; i++) { ++ context.setRegister(i, regs[i]); ++ } ++ return context; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java +new file mode 100644 +index 000000000..1e8cd19b2 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadContext.java +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteRISCV64ThreadContext extends RISCV64ThreadContext { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteRISCV64ThreadContext(RemoteDebuggerClient debugger) { ++ super(); ++ this.debugger = debugger; ++ } ++ ++ public void setRegisterAsAddress(int index, Address value) { ++ setRegister(index, debugger.getAddressValue(value)); ++ } ++ ++ public Address getRegisterAsAddress(int index) { ++ return debugger.newAddress(getRegister(index)); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java +new file mode 100644 +index 000000000..eecb6e029 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/riscv64/RemoteRISCV64ThreadFactory.java +@@ -0,0 +1,46 @@ ++/* ++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.remote.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.remote.*; ++ ++public class RemoteRISCV64ThreadFactory implements RemoteThreadFactory { ++ private RemoteDebuggerClient debugger; ++ ++ public RemoteRISCV64ThreadFactory(RemoteDebuggerClient debugger) { ++ this.debugger = debugger; ++ } ++ ++ public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { ++ return new RemoteRISCV64Thread(debugger, threadIdentifierAddr); ++ } ++ ++ public ThreadProxy createThreadWrapper(long id) { ++ return new RemoteRISCV64Thread(debugger, id); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java +new file mode 100644 +index 000000000..426ff0580 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/riscv64/RISCV64ThreadContext.java +@@ -0,0 +1,172 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.debugger.riscv64; ++ ++import java.lang.annotation.Native; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.cdbg.*; ++ ++/** Specifies the thread context on riscv64 platforms; only a sub-portion ++ * of the context is guaranteed to be present on all operating ++ * systems. */ ++ ++public abstract class RISCV64ThreadContext implements ThreadContext { ++ // Taken from /usr/include/asm/sigcontext.h on Linux/RISCV64. ++ ++ // /* ++ // * Signal context structure - contains all info to do with the state ++ // * before the signal handler was invoked. ++ // */ ++ // struct sigcontext { ++ // struct user_regs_struct sc_regs; ++ // union __riscv_fp_state sc_fpregs; ++ // }; ++ // ++ // struct user_regs_struct { ++ // unsigned long pc; ++ // unsigned long ra; ++ // unsigned long sp; ++ // unsigned long gp; ++ // unsigned long tp; ++ // unsigned long t0; ++ // unsigned long t1; ++ // unsigned long t2; ++ // unsigned long s0; ++ // unsigned long s1; ++ // unsigned long a0; ++ // unsigned long a1; ++ // unsigned long a2; ++ // unsigned long a3; ++ // unsigned long a4; ++ // unsigned long a5; ++ // unsigned long a6; ++ // unsigned long a7; ++ // unsigned long s2; ++ // unsigned long s3; ++ // unsigned long s4; ++ // unsigned long s5; ++ // unsigned long s6; ++ // unsigned long s7; ++ // unsigned long s8; ++ // unsigned long s9; ++ // unsigned long s10; ++ // unsigned long s11; ++ // unsigned long t3; ++ // unsigned long t4; ++ // unsigned long t5; ++ // unsigned long t6; ++ // }; ++ ++ // NOTE: the indices for the various registers must be maintained as ++ // listed across various operating systems. However, only a small ++ // subset of the registers' values are guaranteed to be present (and ++ // must be present for the SA's stack walking to work) ++ ++ // One instance of the Native annotation is enough to trigger header generation ++ // for this file. ++ @Native ++ public static final int R0 = 0; ++ public static final int R1 = 1; ++ public static final int R2 = 2; ++ public static final int R3 = 3; ++ public static final int R4 = 4; ++ public static final int R5 = 5; ++ public static final int R6 = 6; ++ public static final int R7 = 7; ++ public static final int R8 = 8; ++ public static final int R9 = 9; ++ public static final int R10 = 10; ++ public static final int R11 = 11; ++ public static final int R12 = 12; ++ public static final int R13 = 13; ++ public static final int R14 = 14; ++ public static final int R15 = 15; ++ public static final int R16 = 16; ++ public static final int R17 = 17; ++ public static final int R18 = 18; ++ public static final int R19 = 19; ++ public static final int R20 = 20; ++ public static final int R21 = 21; ++ public static final int R22 = 22; ++ public static final int R23 = 23; ++ public static final int R24 = 24; ++ public static final int R25 = 25; ++ public static final int R26 = 26; ++ public static final int R27 = 27; ++ public static final int R28 = 28; ++ public static final int R29 = 29; ++ public static final int R30 = 30; ++ public static final int R31 = 31; ++ ++ public static final int NPRGREG = 32; ++ ++ public static final int PC = R0; ++ public static final int LR = R1; ++ public static final int SP = R2; ++ public static final int FP = R8; ++ ++ private long[] data; ++ ++ public RISCV64ThreadContext() { ++ data = new long[NPRGREG]; ++ } ++ ++ public int getNumRegisters() { ++ return NPRGREG; ++ } ++ ++ public String getRegisterName(int index) { ++ switch (index) { ++ case LR: return "lr"; ++ case SP: return "sp"; ++ case PC: return "pc"; ++ default: ++ return "r" + index; ++ } ++ } ++ ++ public void setRegister(int index, long value) { ++ data[index] = value; ++ } ++ ++ public long getRegister(int index) { ++ return data[index]; ++ } ++ ++ public CFrame getTopFrame(Debugger dbg) { ++ return null; ++ } ++ ++ /** This can't be implemented in this class since we would have to ++ * tie the implementation to, for example, the debugging system */ ++ public abstract void setRegisterAsAddress(int index, Address value); ++ ++ /** This can't be implemented in this class since we would have to ++ * tie the implementation to, for example, the debugging system */ ++ public abstract Address getRegisterAsAddress(int index); ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +index 190062785..74bd614d3 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +@@ -38,6 +38,7 @@ import sun.jvm.hotspot.runtime.win32_aarch64.Win32AARCH64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; ++import sun.jvm.hotspot.runtime.linux_riscv64.LinuxRISCV64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess; + import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; + import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; +@@ -99,6 +100,8 @@ public class Threads { + access = new LinuxPPC64JavaThreadPDAccess(); + } else if (cpu.equals("aarch64")) { + access = new LinuxAARCH64JavaThreadPDAccess(); ++ } else if (cpu.equals("riscv64")) { ++ access = new LinuxRISCV64JavaThreadPDAccess(); + } else { + try { + access = (JavaThreadPDAccess) +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java +new file mode 100644 +index 000000000..2df0837b6 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_riscv64/LinuxRISCV64JavaThreadPDAccess.java +@@ -0,0 +1,132 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.linux_riscv64; ++ ++import java.io.*; ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.runtime.riscv64.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++public class LinuxRISCV64JavaThreadPDAccess implements JavaThreadPDAccess { ++ private static AddressField lastJavaFPField; ++ private static AddressField osThreadField; ++ ++ // Field from OSThread ++ private static CIntegerField osThreadThreadIDField; ++ ++ // This is currently unneeded but is being kept in case we change ++ // the currentFrameGuess algorithm ++ private static final long GUESS_SCAN_RANGE = 128 * 1024; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaThread"); ++ osThreadField = type.getAddressField("_osthread"); ++ ++ Type anchorType = db.lookupType("JavaFrameAnchor"); ++ lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); ++ ++ Type osThreadType = db.lookupType("OSThread"); ++ osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); ++ } ++ ++ public Address getLastJavaFP(Address addr) { ++ return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); ++ } ++ ++ public Address getLastJavaPC(Address addr) { ++ return null; ++ } ++ ++ public Address getBaseOfStackPointer(Address addr) { ++ return null; ++ } ++ ++ public Frame getLastFramePD(JavaThread thread, Address addr) { ++ Address fp = thread.getLastJavaFP(); ++ if (fp == null) { ++ return null; // no information ++ } ++ return new RISCV64Frame(thread.getLastJavaSP(), fp); ++ } ++ ++ public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { ++ return new RISCV64RegisterMap(thread, updateMap); ++ } ++ ++ public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext(); ++ RISCV64CurrentFrameGuess guesser = new RISCV64CurrentFrameGuess(context, thread); ++ if (!guesser.run(GUESS_SCAN_RANGE)) { ++ return null; ++ } ++ if (guesser.getPC() == null) { ++ return new RISCV64Frame(guesser.getSP(), guesser.getFP()); ++ } else { ++ return new RISCV64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); ++ } ++ } ++ ++ public void printThreadIDOn(Address addr, PrintStream tty) { ++ tty.print(getThreadProxy(addr)); ++ } ++ ++ public void printInfoOn(Address threadAddr, PrintStream tty) { ++ tty.print("Thread id: "); ++ printThreadIDOn(threadAddr, tty); ++ } ++ ++ public Address getLastSP(Address addr) { ++ ThreadProxy t = getThreadProxy(addr); ++ RISCV64ThreadContext context = (RISCV64ThreadContext) t.getContext(); ++ return context.getRegisterAsAddress(RISCV64ThreadContext.SP); ++ } ++ ++ public ThreadProxy getThreadProxy(Address addr) { ++ // Addr is the address of the JavaThread. ++ // Fetch the OSThread (for now and for simplicity, not making a ++ // separate "OSThread" class in this package) ++ Address osThreadAddr = osThreadField.getValue(addr); ++ // Get the address of the _thread_id from the OSThread ++ Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); ++ ++ JVMDebugger debugger = VM.getVM().getDebugger(); ++ return debugger.getThreadForIdentifierAddress(threadIdAddr); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java +new file mode 100644 +index 000000000..a3bbf1ad1 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64CurrentFrameGuess.java +@@ -0,0 +1,223 @@ ++/* ++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.debugger.riscv64.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.interpreter.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.runtime.riscv64.*; ++ ++/**

Should be able to be used on all riscv64 platforms we support ++ (Linux/riscv64) to implement JavaThread's "currentFrameGuess()" ++ functionality. Input is an RISCV64ThreadContext; output is SP, FP, ++ and PC for an RISCV64Frame. Instantiation of the RISCV64Frame is ++ left to the caller, since we may need to subclass RISCV64Frame to ++ support signal handler frames on Unix platforms.

++ ++

Algorithm is to walk up the stack within a given range (say, ++ 512K at most) looking for a plausible PC and SP for a Java frame, ++ also considering those coming in from the context. If we find a PC ++ that belongs to the VM (i.e., in generated code like the ++ interpreter or CodeCache) then we try to find an associated FP. ++ We repeat this until we either find a complete frame or run out of ++ stack to look at.

*/ ++ ++public class RISCV64CurrentFrameGuess { ++ private RISCV64ThreadContext context; ++ private JavaThread thread; ++ private Address spFound; ++ private Address fpFound; ++ private Address pcFound; ++ ++ private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.riscv64.RISCV64Frame.DEBUG") ++ != null; ++ ++ public RISCV64CurrentFrameGuess(RISCV64ThreadContext context, ++ JavaThread thread) { ++ this.context = context; ++ this.thread = thread; ++ } ++ ++ /** Returns false if not able to find a frame within a reasonable range. */ ++ public boolean run(long regionInBytesToSearch) { ++ Address sp = context.getRegisterAsAddress(RISCV64ThreadContext.SP); ++ Address pc = context.getRegisterAsAddress(RISCV64ThreadContext.PC); ++ Address fp = context.getRegisterAsAddress(RISCV64ThreadContext.FP); ++ if (sp == null) { ++ // Bail out if no last java frame either ++ if (thread.getLastJavaSP() != null) { ++ setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); ++ return true; ++ } ++ return false; ++ } ++ Address end = sp.addOffsetTo(regionInBytesToSearch); ++ VM vm = VM.getVM(); ++ ++ setValues(null, null, null); // Assume we're not going to find anything ++ ++ if (vm.isJavaPCDbg(pc)) { ++ if (vm.isClientCompiler()) { ++ // If the topmost frame is a Java frame, we are (pretty much) ++ // guaranteed to have a viable FP. We should be more robust ++ // than this (we have the potential for losing entire threads' ++ // stack traces) but need to see how much work we really have ++ // to do here. Searching the stack for an (SP, FP) pair is ++ // hard since it's easy to misinterpret inter-frame stack ++ // pointers as base-of-frame pointers; we also don't know the ++ // sizes of C1 frames (not registered in the nmethod) so can't ++ // derive them from SP. ++ ++ setValues(sp, fp, pc); ++ return true; ++ } else { ++ if (vm.getInterpreter().contains(pc)) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + ++ sp + ", fp = " + fp + ", pc = " + pc); ++ } ++ setValues(sp, fp, pc); ++ return true; ++ } ++ ++ // For the server compiler, FP is not guaranteed to be valid ++ // for compiled code. In addition, an earlier attempt at a ++ // non-searching algorithm (see below) failed because the ++ // stack pointer from the thread context was pointing ++ // (considerably) beyond the ostensible end of the stack, into ++ // garbage; walking from the topmost frame back caused a crash. ++ // ++ // This algorithm takes the current PC as a given and tries to ++ // find the correct corresponding SP by walking up the stack ++ // and repeatedly performing stackwalks (very inefficient). ++ // ++ // FIXME: there is something wrong with stackwalking across ++ // adapter frames...this is likely to be the root cause of the ++ // failure with the simpler algorithm below. ++ ++ for (long offset = 0; ++ offset < regionInBytesToSearch; ++ offset += vm.getAddressSize()) { ++ try { ++ Address curSP = sp.addOffsetTo(offset); ++ Frame frame = new RISCV64Frame(curSP, null, pc); ++ RegisterMap map = thread.newRegisterMap(false); ++ while (frame != null) { ++ if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { ++ // We were able to traverse all the way to the ++ // bottommost Java frame. ++ // This sp looks good. Keep it. ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); ++ } ++ setValues(curSP, null, pc); ++ return true; ++ } ++ frame = frame.sender(map); ++ } ++ } catch (Exception e) { ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); ++ } ++ // Bad SP. Try another. ++ } ++ } ++ ++ // Were not able to find a plausible SP to go with this PC. ++ // Bail out. ++ return false; ++ } ++ } else { ++ // If the current program counter was not known to us as a Java ++ // PC, we currently assume that we are in the run-time system ++ // and attempt to look to thread-local storage for saved SP and ++ // FP. Note that if these are null (because we were, in fact, ++ // in Java code, i.e., vtable stubs or similar, and the SA ++ // didn't have enough insight into the target VM to understand ++ // that) then we are going to lose the entire stack trace for ++ // the thread, which is sub-optimal. FIXME. ++ ++ if (DEBUG) { ++ System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + ++ thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); ++ } ++ if (thread.getLastJavaSP() == null) { ++ return false; // No known Java frames on stack ++ } ++ ++ // The runtime has a nasty habit of not saving fp in the frame ++ // anchor, leaving us to grovel about in the stack to find a ++ // plausible address. Fortunately, this only happens in ++ // compiled code; there we always have a valid PC, and we always ++ // push LR and FP onto the stack as a pair, with FP at the lower ++ // address. ++ pc = thread.getLastJavaPC(); ++ fp = thread.getLastJavaFP(); ++ sp = thread.getLastJavaSP(); ++ ++ if (fp == null) { ++ CodeCache cc = vm.getCodeCache(); ++ if (cc.contains(pc)) { ++ CodeBlob cb = cc.findBlob(pc); ++ if (DEBUG) { ++ System.out.println("FP is null. Found blob frame size " + cb.getFrameSize()); ++ } ++ // See if we can derive a frame pointer from SP and PC ++ long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize(); ++ if (link_offset >= 0) { ++ fp = sp.addOffsetTo(link_offset); ++ } ++ } ++ } ++ ++ // We found a PC in the frame anchor. Check that it's plausible, and ++ // if it is, use it. ++ if (vm.isJavaPCDbg(pc)) { ++ setValues(sp, fp, pc); ++ } else { ++ setValues(sp, fp, null); ++ } ++ ++ return true; ++ } ++ } ++ ++ public Address getSP() { return spFound; } ++ public Address getFP() { return fpFound; } ++ /** May be null if getting values from thread-local storage; take ++ care to call the correct RISCV64Frame constructor to recover this if ++ necessary */ ++ public Address getPC() { return pcFound; } ++ ++ private void setValues(Address sp, Address fp, Address pc) { ++ spFound = sp; ++ fpFound = fp; ++ pcFound = pc; ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java +new file mode 100644 +index 000000000..c04def5a1 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64Frame.java +@@ -0,0 +1,554 @@ ++/* ++ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.riscv64; ++ ++import java.util.*; ++import sun.jvm.hotspot.code.*; ++import sun.jvm.hotspot.compiler.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.oops.*; ++import sun.jvm.hotspot.runtime.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.utilities.*; ++ ++/** Specialization of and implementation of abstract methods of the ++ Frame class for the riscv64 family of CPUs. */ ++ ++public class RISCV64Frame extends Frame { ++ private static final boolean DEBUG; ++ static { ++ DEBUG = System.getProperty("sun.jvm.hotspot.runtime.RISCV64.RISCV64Frame.DEBUG") != null; ++ } ++ ++ // Java frames ++ private static final int LINK_OFFSET = -2; ++ private static final int RETURN_ADDR_OFFSET = -1; ++ private static final int SENDER_SP_OFFSET = 0; ++ ++ // Interpreter frames ++ private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -3; ++ private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; ++ private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; ++ private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only ++ private static int INTERPRETER_FRAME_PADDING_OFFSET; ++ private static int INTERPRETER_FRAME_MIRROR_OFFSET; ++ private static int INTERPRETER_FRAME_CACHE_OFFSET; ++ private static int INTERPRETER_FRAME_LOCALS_OFFSET; ++ private static int INTERPRETER_FRAME_BCX_OFFSET; ++ private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; ++ private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; ++ ++ // Entry frames ++ private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -10; ++ ++ // Native frames ++ private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; ++ ++ private static VMReg fp = new VMReg(8); ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; ++ INTERPRETER_FRAME_PADDING_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; ++ INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_PADDING_OFFSET - 1; ++ INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; ++ INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; ++ INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; ++ INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; ++ INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; ++ } ++ ++ ++ // an additional field beyond sp and pc: ++ Address raw_fp; // frame pointer ++ private Address raw_unextendedSP; ++ ++ private RISCV64Frame() { ++ } ++ ++ private void adjustForDeopt() { ++ if ( pc != null) { ++ // Look for a deopt pc and if it is deopted convert to original pc ++ CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); ++ if (cb != null && cb.isJavaMethod()) { ++ NMethod nm = (NMethod) cb; ++ if (pc.equals(nm.deoptHandlerBegin())) { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); ++ } ++ // adjust pc if frame is deoptimized. ++ pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); ++ deoptimized = true; ++ } ++ } ++ } ++ } ++ ++ public RISCV64Frame(Address raw_sp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("RISCV64Frame(sp, fp, pc): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public RISCV64Frame(Address raw_sp, Address raw_fp) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_sp; ++ this.raw_fp = raw_fp; ++ ++ // We cannot assume SP[-1] always contains a valid return PC (e.g. if ++ // the callee is a C/C++ compiled frame). If the PC is not known to ++ // Java then this.pc is null. ++ Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); ++ if (VM.getVM().isJavaPCDbg(savedPC)) { ++ this.pc = savedPC; ++ } ++ ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("RISCV64Frame(sp, fp): " + this); ++ dumpStack(); ++ } ++ } ++ ++ public RISCV64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { ++ this.raw_sp = raw_sp; ++ this.raw_unextendedSP = raw_unextendedSp; ++ this.raw_fp = raw_fp; ++ this.pc = pc; ++ adjustUnextendedSP(); ++ ++ // Frame must be fully constructed before this call ++ adjustForDeopt(); ++ ++ if (DEBUG) { ++ System.out.println("RISCV64Frame(sp, unextendedSP, fp, pc): " + this); ++ dumpStack(); ++ } ++ ++ } ++ ++ public Object clone() { ++ RISCV64Frame frame = new RISCV64Frame(); ++ frame.raw_sp = raw_sp; ++ frame.raw_unextendedSP = raw_unextendedSP; ++ frame.raw_fp = raw_fp; ++ frame.pc = pc; ++ frame.deoptimized = deoptimized; ++ return frame; ++ } ++ ++ public boolean equals(Object arg) { ++ if (arg == null) { ++ return false; ++ } ++ ++ if (!(arg instanceof RISCV64Frame)) { ++ return false; ++ } ++ ++ RISCV64Frame other = (RISCV64Frame) arg; ++ ++ return (AddressOps.equal(getSP(), other.getSP()) && ++ AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && ++ AddressOps.equal(getFP(), other.getFP()) && ++ AddressOps.equal(getPC(), other.getPC())); ++ } ++ ++ public int hashCode() { ++ if (raw_sp == null) { ++ return 0; ++ } ++ ++ return raw_sp.hashCode(); ++ } ++ ++ public String toString() { ++ return "sp: " + (getSP() == null? "null" : getSP().toString()) + ++ ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + ++ ", fp: " + (getFP() == null? "null" : getFP().toString()) + ++ ", pc: " + (pc == null? "null" : pc.toString()); ++ } ++ ++ // accessors for the instance variables ++ public Address getFP() { return raw_fp; } ++ public Address getSP() { return raw_sp; } ++ public Address getID() { return raw_sp; } ++ ++ // FIXME: not implemented yet ++ public boolean isSignalHandlerFrameDbg() { return false; } ++ public int getSignalNumberDbg() { return 0; } ++ public String getSignalNameDbg() { return null; } ++ ++ public boolean isInterpretedFrameValid() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "Not an interpreted frame"); ++ } ++ ++ // These are reasonable sanity checks ++ if (getFP() == null || getFP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getSP() == null || getSP().andWithMask(0x3) != null) { ++ return false; ++ } ++ ++ if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { ++ return false; ++ } ++ ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (getFP().lessThanOrEqual(getSP())) { ++ // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { ++ // stack frames shouldn't be large. ++ return false; ++ } ++ ++ return true; ++ } ++ ++ public Frame sender(RegisterMap regMap, CodeBlob cb) { ++ RISCV64RegisterMap map = (RISCV64RegisterMap) regMap; ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map.setIncludeArgumentOops(false); ++ ++ if (isEntryFrame()) return senderForEntryFrame(map); ++ if (isInterpretedFrame()) return senderForInterpreterFrame(map); ++ ++ if(cb == null) { ++ cb = VM.getVM().getCodeCache().findBlob(getPC()); ++ } else { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); ++ } ++ } ++ ++ if (cb != null) { ++ return senderForCompiledFrame(map, cb); ++ } ++ ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return new RISCV64Frame(getSenderSP(), getLink(), getSenderPC()); ++ } ++ ++ private Frame senderForEntryFrame(RISCV64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForEntryFrame"); ++ } ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ RISCV64JavaCallWrapper jcw = (RISCV64JavaCallWrapper) getEntryFrameCallWrapper(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); ++ Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); ++ } ++ RISCV64Frame fr; ++ if (jcw.getLastJavaPC() != null) { ++ fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); ++ } else { ++ fr = new RISCV64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); ++ } ++ map.clear(); ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); ++ } ++ return fr; ++ } ++ ++ //------------------------------------------------------------------------------ ++ // frame::adjust_unextended_sp ++ private void adjustUnextendedSP() { ++ // If we are returning to a compiled MethodHandle call site, the ++ // saved_fp will in fact be a saved value of the unextended SP. The ++ // simplest way to tell whether we are returning to such a call site ++ // is as follows: ++ ++ CodeBlob cb = cb(); ++ NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); ++ if (senderNm != null) { ++ // If the sender PC is a deoptimization point, get the original ++ // PC. For MethodHandle call site the unextended_sp is stored in ++ // saved_fp. ++ if (senderNm.isDeoptMhEntry(getPC())) { ++ raw_unextendedSP = getFP(); ++ } ++ else if (senderNm.isDeoptEntry(getPC())) { ++ } ++ else if (senderNm.isMethodHandleReturn(getPC())) { ++ raw_unextendedSP = getFP(); ++ } ++ } ++ } ++ ++ private Frame senderForInterpreterFrame(RISCV64RegisterMap map) { ++ if (DEBUG) { ++ System.out.println("senderForInterpreterFrame"); ++ } ++ Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ Address sp = addressOfStackSlot(SENDER_SP_OFFSET); ++ // We do not need to update the callee-save register mapping because above ++ // us is either another interpreter frame or a converter-frame, but never ++ // directly a compiled frame. ++ // 11/24/04 SFG. With the removal of adapter frames this is no longer true. ++ // However c2 no longer uses callee save register for java calls so there ++ // are no callee register to find. ++ ++ if (map.getUpdateMap()) ++ updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); ++ ++ return new RISCV64Frame(sp, unextendedSP, getLink(), getSenderPC()); ++ } ++ ++ private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { ++ map.setLocation(fp, savedFPAddr); ++ } ++ ++ private Frame senderForCompiledFrame(RISCV64RegisterMap map, CodeBlob cb) { ++ if (DEBUG) { ++ System.out.println("senderForCompiledFrame"); ++ } ++ ++ // ++ // NOTE: some of this code is (unfortunately) duplicated RISCV64CurrentFrameGuess ++ // ++ ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(map != null, "map must be set"); ++ } ++ ++ // frame owned by optimizing compiler ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); ++ } ++ Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); ++ ++ // The return_address is always the word on the stack ++ Address senderPC = senderSP.getAddressAt(RETURN_ADDR_OFFSET * VM.getVM().getAddressSize()); ++ ++ // This is the saved value of FP which may or may not really be an FP. ++ // It is only an FP if the sender is an interpreter frame. ++ Address savedFPAddr = senderSP.addOffsetTo(LINK_OFFSET * VM.getVM().getAddressSize()); ++ ++ if (map.getUpdateMap()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map.setIncludeArgumentOops(cb.callerMustGCArguments()); ++ ++ if (cb.getOopMaps() != null) { ++ ImmutableOopMapSet.updateRegisterMap(this, cb, map, true); ++ } ++ ++ // Since the prolog does the save and restore of FP there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ updateMapWithSavedLink(map, savedFPAddr); ++ } ++ ++ return new RISCV64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); ++ } ++ ++ protected boolean hasSenderPD() { ++ return true; ++ } ++ ++ public long frameSize() { ++ return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); ++ } ++ ++ public Address getLink() { ++ try { ++ if (DEBUG) { ++ System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET) ++ + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0)); ++ } ++ return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); ++ } catch (Exception e) { ++ if (DEBUG) ++ System.out.println("Returning null"); ++ return null; ++ } ++ } ++ ++ public Address getUnextendedSP() { return raw_unextendedSP; } ++ ++ // Return address: ++ public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } ++ public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } ++ ++ // return address of param, zero origin index. ++ public Address getNativeParamAddr(int idx) { ++ return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); ++ } ++ ++ public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } ++ ++ public Address addressOfInterpreterFrameLocals() { ++ return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); ++ } ++ ++ private Address addressOfInterpreterFrameBCX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); ++ } ++ ++ public int getInterpreterFrameBCI() { ++ // FIXME: this is not atomic with respect to GC and is unsuitable ++ // for use in a non-debugging, or reflective, system. Need to ++ // figure out how to express this. ++ Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); ++ Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); ++ Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); ++ return bcpToBci(bcp, method); ++ } ++ ++ public Address addressOfInterpreterFrameMDX() { ++ return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); ++ } ++ ++ // expression stack ++ // (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++ public Address addressOfInterpreterFrameExpressionStack() { ++ Address monitorEnd = interpreterFrameMonitorEnd().address(); ++ return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); ++ } ++ ++ public int getInterpreterFrameExpressionStackDirection() { return -1; } ++ ++ // top of expression stack ++ public Address addressOfInterpreterFrameTOS() { ++ return getSP(); ++ } ++ ++ /** Expression stack from top down */ ++ public Address addressOfInterpreterFrameTOSAt(int slot) { ++ return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); ++ } ++ ++ public Address getInterpreterFrameSenderSP() { ++ if (Assert.ASSERTS_ENABLED) { ++ Assert.that(isInterpretedFrame(), "interpreted frame expected"); ++ } ++ return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); ++ } ++ ++ // Monitors ++ public BasicObjectLock interpreterFrameMonitorBegin() { ++ return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); ++ } ++ ++ public BasicObjectLock interpreterFrameMonitorEnd() { ++ Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); ++ if (Assert.ASSERTS_ENABLED) { ++ // make sure the pointer points inside the frame ++ Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); ++ Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); ++ } ++ return new BasicObjectLock(result); ++ } ++ ++ public int interpreterFrameMonitorSize() { ++ return BasicObjectLock.size(); ++ } ++ ++ // Method ++ public Address addressOfInterpreterFrameMethod() { ++ return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); ++ } ++ ++ // Constant pool cache ++ public Address addressOfInterpreterFrameCPCache() { ++ return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); ++ } ++ ++ // Entry frames ++ public JavaCallWrapper getEntryFrameCallWrapper() { ++ return new RISCV64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); ++ } ++ ++ protected Address addressOfSavedOopResult() { ++ // offset is 2 for compiler2 and 3 for compiler1 ++ return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * ++ VM.getVM().getAddressSize()); ++ } ++ ++ protected Address addressOfSavedReceiver() { ++ return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ } ++ ++ private void dumpStack() { ++ for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); ++ AddressOps.lt(addr, getSP()); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ System.out.println("-----------------------"); ++ for (Address addr = getSP(); ++ AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); ++ addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { ++ System.out.println(addr + ": " + addr.getAddressAt(0)); ++ } ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java +new file mode 100644 +index 000000000..4d79e3ee4 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64JavaCallWrapper.java +@@ -0,0 +1,58 @@ ++/* ++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.riscv64; ++ ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class RISCV64JavaCallWrapper extends JavaCallWrapper { ++ private static AddressField lastJavaFPField; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaFrameAnchor"); ++ ++ lastJavaFPField = type.getAddressField("_last_Java_fp"); ++ } ++ ++ public RISCV64JavaCallWrapper(Address addr) { ++ super(addr); ++ } ++ ++ public Address getLastJavaFP() { ++ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java +new file mode 100644 +index 000000000..d7187a5f8 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/riscv64/RISCV64RegisterMap.java +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, Red Hat Inc. ++ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.riscv64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class RISCV64RegisterMap extends RegisterMap { ++ ++ /** This is the only public constructor */ ++ public RISCV64RegisterMap(JavaThread thread, boolean updateMap) { ++ super(thread, updateMap); ++ } ++ ++ protected RISCV64RegisterMap(RegisterMap map) { ++ super(map); ++ } ++ ++ public Object clone() { ++ RISCV64RegisterMap retval = new RISCV64RegisterMap(this); ++ return retval; ++ } ++ ++ // no PD state to clear or copy: ++ protected void clearPD() {} ++ protected void initializePD() {} ++ protected void initializeFromPD(RegisterMap map) {} ++ protected Address getLocationPD(VMReg reg) { return null; } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +index 7d7a6107c..948eabcab 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +@@ -54,7 +54,7 @@ public class PlatformInfo { + + public static boolean knownCPU(String cpu) { + final String[] KNOWN = +- new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"}; ++ new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "riscv64"}; + + for(String s : KNOWN) { + if(s.equals(cpu)) +diff --git a/src/utils/hsdis/hsdis.c b/src/utils/hsdis/hsdis.c +index d0a6f4ea8..a29c7bf8b 100644 +--- a/src/utils/hsdis/hsdis.c ++++ b/src/utils/hsdis/hsdis.c +@@ -28,9 +28,6 @@ + */ + + #include /* required by bfd.h */ +-#include +-#include +-#include + + #include + #include +@@ -479,6 +476,9 @@ static const char* native_arch_name() { + #endif + #ifdef LIBARCH_s390x + res = "s390:64-bit"; ++#endif ++#ifdef LIBARCH_riscv64 ++ res = "riscv:rv64"; + #endif + if (res == NULL) + res = "architecture not set in Makefile!"; +diff --git a/test/hotspot/jtreg/compiler/c2/TestBit.java b/test/hotspot/jtreg/compiler/c2/TestBit.java +index 7805918c2..a21307083 100644 +--- a/test/hotspot/jtreg/compiler/c2/TestBit.java ++++ b/test/hotspot/jtreg/compiler/c2/TestBit.java +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -34,7 +35,7 @@ import jdk.test.lib.process.ProcessTools; + * + * @run driver compiler.c2.TestBit + * +- * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" ++ * @requires os.arch=="aarch64" | os.arch=="amd64" | os.arch == "ppc64le" | os.arch == "riscv64" + * @requires vm.debug == true & vm.compiler2.enabled + */ + public class TestBit { +@@ -54,7 +55,8 @@ public class TestBit { + String expectedTestBitInstruction = + "ppc64le".equals(System.getProperty("os.arch")) ? "ANDI" : + "aarch64".equals(System.getProperty("os.arch")) ? "tb" : +- "amd64".equals(System.getProperty("os.arch")) ? "test" : null; ++ "amd64".equals(System.getProperty("os.arch")) ? "test" : ++ "riscv64".equals(System.getProperty("os.arch")) ? "andi" : null; + + if (expectedTestBitInstruction != null) { + output.shouldContain(expectedTestBitInstruction); +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java +index 558b4218f..9d875e33f 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA1IntrinsicsOptionOnUnsupportedCPU.java +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli; + + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; + import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; +@@ -54,6 +56,8 @@ public class TestUseSHA1IntrinsicsOptionOnUnsupportedCPU { + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), ++ new GenericTestCaseForUnsupportedRISCV64CPU( ++ SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( + SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION), + new GenericTestCaseForOtherCPU( +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java +index 3ed72bf0a..a7e277060 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA256IntrinsicsOptionOnUnsupportedCPU.java +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli; + + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; + import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; +@@ -54,6 +56,8 @@ public class TestUseSHA256IntrinsicsOptionOnUnsupportedCPU { + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), ++ new GenericTestCaseForUnsupportedRISCV64CPU( ++ SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( + SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION), + new GenericTestCaseForOtherCPU( +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java +index c05cf309d..e714fcc59 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHA512IntrinsicsOptionOnUnsupportedCPU.java +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -41,6 +42,7 @@ package compiler.intrinsics.sha.cli; + + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; + import compiler.intrinsics.sha.cli.testcases.UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU; +@@ -54,6 +56,8 @@ public class TestUseSHA512IntrinsicsOptionOnUnsupportedCPU { + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), ++ new GenericTestCaseForUnsupportedRISCV64CPU( ++ SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new UseSHAIntrinsicsSpecificTestCaseForUnsupportedCPU( + SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION), + new GenericTestCaseForOtherCPU( +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java +index 58ce5366b..d52d81e26 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/TestUseSHAOptionOnUnsupportedCPU.java +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -40,6 +41,7 @@ package compiler.intrinsics.sha.cli; + + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForOtherCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedAArch64CPU; ++import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedRISCV64CPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedSparcCPU; + import compiler.intrinsics.sha.cli.testcases.GenericTestCaseForUnsupportedX86CPU; + import compiler.intrinsics.sha.cli.testcases.UseSHASpecificTestCaseForUnsupportedCPU; +@@ -53,6 +55,8 @@ public class TestUseSHAOptionOnUnsupportedCPU { + SHAOptionsBase.USE_SHA_OPTION), + new GenericTestCaseForUnsupportedAArch64CPU( + SHAOptionsBase.USE_SHA_OPTION), ++ new GenericTestCaseForUnsupportedRISCV64CPU( ++ SHAOptionsBase.USE_SHA_OPTION), + new UseSHASpecificTestCaseForUnsupportedCPU( + SHAOptionsBase.USE_SHA_OPTION), + new GenericTestCaseForOtherCPU( +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +index faa9fdbae..50e549069 100644 +--- a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -32,26 +33,27 @@ import jdk.test.lib.cli.predicate.OrPredicate; + + /** + * Generic test case for SHA-related options targeted to any CPU except +- * AArch64, PPC, S390x, SPARC and X86. ++ * AArch64, RISCV64, PPC, S390x, SPARC and X86. + */ + public class GenericTestCaseForOtherCPU extends + SHAOptionsBase.TestCase { + public GenericTestCaseForOtherCPU(String optionName) { +- // Execute the test case on any CPU except AArch64, PPC, S390x, SPARC and X86. ++ // Execute the test case on any CPU except AArch64, RISCV64, PPC, S390x, SPARC and X86. + super(optionName, new NotPredicate( + new OrPredicate(Platform::isAArch64, ++ new OrPredicate(Platform::isRISCV64, + new OrPredicate(Platform::isS390x, + new OrPredicate(Platform::isSparc, + new OrPredicate(Platform::isPPC, + new OrPredicate(Platform::isX64, +- Platform::isX86))))))); ++ Platform::isX86)))))))); + } + + @Override + protected void verifyWarnings() throws Throwable { + String shouldPassMessage = String.format("JVM should start with " + + "option '%s' without any warnings", optionName); +- // Verify that on non-x86, non-SPARC and non-AArch64 CPU usage of ++ // Verify that on non-x86, non-SPARC, non-AArch64 CPU and non-RISCV64 usage of + // SHA-related options will not cause any warnings. + CommandLineOptionTest.verifySameJVMStartup(null, + new String[] { ".*" + optionName + ".*" }, shouldPassMessage, +diff --git a/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java +new file mode 100644 +index 000000000..d81b5b53f +--- /dev/null ++++ b/test/hotspot/jtreg/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedRISCV64CPU.java +@@ -0,0 +1,102 @@ ++/* ++ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++package compiler.intrinsics.sha.cli.testcases; ++ ++import compiler.intrinsics.sha.cli.SHAOptionsBase; ++import jdk.test.lib.process.ExitCode; ++import jdk.test.lib.Platform; ++import jdk.test.lib.cli.CommandLineOptionTest; ++import jdk.test.lib.cli.predicate.AndPredicate; ++import jdk.test.lib.cli.predicate.NotPredicate; ++ ++/** ++ * Generic test case for SHA-related options targeted to RISCV64 CPUs ++ * which don't support instruction required by the tested option. ++ */ ++public class GenericTestCaseForUnsupportedRISCV64CPU extends ++ SHAOptionsBase.TestCase { ++ public GenericTestCaseForUnsupportedRISCV64CPU(String optionName) { ++ super(optionName, new AndPredicate(Platform::isRISCV64, ++ new NotPredicate(SHAOptionsBase.getPredicateForOption( ++ optionName)))); ++ } ++ ++ @Override ++ protected void verifyWarnings() throws Throwable { ++ String shouldPassMessage = String.format("JVM startup should pass with" ++ + "option '-XX:-%s' without any warnings", optionName); ++ //Verify that option could be disabled without any warnings. ++ CommandLineOptionTest.verifySameJVMStartup(null, new String[] { ++ SHAOptionsBase.getWarningForUnsupportedCPU(optionName) ++ }, shouldPassMessage, shouldPassMessage, ExitCode.OK, ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(optionName, false)); ++ ++ shouldPassMessage = String.format("If JVM is started with '-XX:-" ++ + "%s' '-XX:+%s', output should contain warning.", ++ SHAOptionsBase.USE_SHA_OPTION, optionName); ++ ++ // Verify that when the tested option is enabled, then ++ // a warning will occur in VM output if UseSHA is disabled. ++ if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) { ++ CommandLineOptionTest.verifySameJVMStartup( ++ new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) }, ++ null, ++ shouldPassMessage, ++ shouldPassMessage, ++ ExitCode.OK, ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false), ++ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); ++ } ++ } ++ ++ @Override ++ protected void verifyOptionValues() throws Throwable { ++ // Verify that option is disabled by default. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be disabled by default", ++ optionName), ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS); ++ ++ // Verify that option is disabled even if it was explicitly enabled ++ // using CLI options. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be off on unsupported " ++ + "RISCV64CPU even if set to true directly", optionName), ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag(optionName, true)); ++ ++ // Verify that option is disabled when +UseSHA was passed to JVM. ++ CommandLineOptionTest.verifyOptionValueForSameVM(optionName, "false", ++ String.format("Option '%s' should be off on unsupported " ++ + "RISCV64CPU even if %s flag set to JVM", ++ optionName, CommandLineOptionTest.prepareBooleanFlag( ++ SHAOptionsBase.USE_SHA_OPTION, true)), ++ SHAOptionsBase.UNLOCK_DIAGNOSTIC_VM_OPTIONS, ++ CommandLineOptionTest.prepareBooleanFlag( ++ SHAOptionsBase.USE_SHA_OPTION, true)); ++ } ++} +diff --git a/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java +new file mode 100644 +index 000000000..d3aafec8e +--- /dev/null ++++ b/test/hotspot/jtreg/compiler/intrinsics/string/TestStringLatin1IndexOfChar.java +@@ -0,0 +1,153 @@ ++/* ++ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++/* ++ * @test ++ * @bug 8173585 ++ * @summary Test intrinsification of StringLatin1.indexOf(char). Note that ++ * differing code paths are taken contingent upon the length of the input String. ++ * Hence we must test against differing string lengths in order to validate ++ * correct functionality. We also ensure the strings are long enough to trigger ++ * the looping conditions of the individual code paths. ++ * ++ * Run with varing levels of AVX and SSE support, also without the intrinsic at all ++ * ++ * @library /compiler/patches /test/lib ++ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 compiler.intrinsics.string.TestStringLatin1IndexOfChar ++ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+UnlockDiagnosticVMOptions -XX:DisableIntrinsic=_indexOfL_char compiler.intrinsics.string.TestStringLatin1IndexOfChar ++ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseSSE=0 compiler.intrinsics.string.TestStringLatin1IndexOfChar ++ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=1 compiler.intrinsics.string.TestStringLatin1IndexOfChar ++ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=2 compiler.intrinsics.string.TestStringLatin1IndexOfChar ++ * @run main/othervm -Xbatch -XX:Tier4InvocationThreshold=200 -XX:CompileThreshold=100 -XX:+IgnoreUnrecognizedVMOptions -XX:UseAVX=3 compiler.intrinsics.string.TestStringLatin1IndexOfChar ++ */ ++ ++package compiler.intrinsics.string; ++ ++import jdk.test.lib.Asserts; ++ ++public class TestStringLatin1IndexOfChar{ ++ private final static int MAX_LENGTH = 2048;//future proof for AVX-512 instructions ++ ++ public static void main(String[] args) throws Exception { ++ for (int i = 0; i < 1_000; ++i) {//repeat such that we enter into C2 code... ++ findOneItem(); ++ withOffsetTest(); ++ testEmpty(); ++ } ++ } ++ ++ private static void testEmpty(){ ++ Asserts.assertEQ("".indexOf('a'), -1); ++ } ++ ++ private final static char SEARCH_CHAR = 'z'; ++ private final static char INVERLEAVING_CHAR = 'a'; ++ private final static char MISSING_CHAR = 'd'; ++ ++ private static void findOneItem(){ ++ //test strings of varying length ensuring that for all lengths one instance of the ++ //search char can be found. We check what happens when the search character is in ++ //each position of the search string (including first and last positions) ++ for(int strLength : new int[]{1, 15, 31, 32, 79}){ ++ for(int searchPos = 0; searchPos < strLength; searchPos++){ ++ String totest = makeOneItemStringLatin1(strLength, searchPos); ++ ++ int intri = totest.indexOf(SEARCH_CHAR); ++ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0); ++ Asserts.assertEQ(intri, nonintri); ++ } ++ } ++ } ++ ++ private static String makeOneItemStringLatin1(int length, int searchPos){ ++ StringBuilder sb = new StringBuilder(length); ++ ++ for(int n =0; n < length; n++){ ++ sb.append(searchPos==n?SEARCH_CHAR:INVERLEAVING_CHAR); ++ } ++ ++ return sb.toString(); ++ } ++ ++ private static void withOffsetTest(){ ++ //progressivly move through string checking indexes and starting offset correctly processed ++ //string is of form azaza, aazaazaa, aaazaaazaaa, etc ++ //we find n s.t. maxlength = (n*3) + 2 ++ int maxaInstances = (MAX_LENGTH-2)/3; ++ ++ for(int aInstances = 5; aInstances < MAX_LENGTH; aInstances++){ ++ String totest = makeWithOffsetStringLatin1(aInstances); ++ ++ int startoffset; ++ { ++ int intri = totest.indexOf(SEARCH_CHAR); ++ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, 0); ++ ++ Asserts.assertEQ(intri, nonintri); ++ startoffset = intri+1; ++ } ++ ++ { ++ int intri = totest.indexOf(SEARCH_CHAR, startoffset); ++ int nonintri = indexOfCharNonIntrinsic(totest, SEARCH_CHAR, startoffset); ++ ++ Asserts.assertEQ(intri, nonintri); ++ startoffset = intri+1; ++ } ++ ++ Asserts.assertEQ(totest.indexOf(SEARCH_CHAR, startoffset), -1);//only two SEARCH_CHAR per string ++ Asserts.assertEQ(totest.indexOf(MISSING_CHAR), -1); ++ } ++ } ++ ++ private static String makeWithOffsetStringLatin1(int aInstances){ ++ StringBuilder sb = new StringBuilder((aInstances*3) + 2); ++ for(int n =0; n < aInstances; n++){ ++ sb.append(INVERLEAVING_CHAR); ++ } ++ ++ sb.append(SEARCH_CHAR); ++ ++ for(int n =0; n < aInstances; n++){ ++ sb.append(INVERLEAVING_CHAR); ++ } ++ ++ sb.append(SEARCH_CHAR); ++ ++ for(int n =0; n < aInstances; n++){ ++ sb.append(INVERLEAVING_CHAR); ++ } ++ return sb.toString(); ++ } ++ ++ private static int indexOfCharNonIntrinsic(String value, int ch, int fromIndex) { ++ //non intrinsic version of indexOfChar ++ byte c = (byte)ch; ++ for (int i = fromIndex; i < value.length(); i++) { ++ if (value.charAt(i) == c) { ++ return i; ++ } ++ } ++ return -1; ++ } ++} +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java +index 2e3e2717a..8093d6598 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Double.java +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java +index 0e06a9e43..1ff9f36e1 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Float.java +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java +index c3cdbf374..f3531ea74 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ProdRed_Int.java +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java +index d33bd411f..589209447 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions + * -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java +index 992fa4b51..907e21371 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Double.java +@@ -25,7 +25,7 @@ + * @test + * @bug 8138583 + * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : double abs & neg test +- * @requires os.arch=="aarch64" ++ * @requires os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java +index 3e79b3528..c41c0b606 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedAbsNeg_Float.java +@@ -25,7 +25,7 @@ + * @test + * @bug 8138583 + * @summary Add C2 AArch64 Superword support for scalar sum reduction optimizations : float abs & neg test +- * @requires os.arch=="aarch64" ++ * @requires os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java +index 6603dd224..b626da40d 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRedSqrt_Double.java +@@ -25,7 +25,7 @@ + * @test + * @bug 8135028 + * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java +index d9a0c9880..92cd84a2f 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Double.java +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java +index 722db95ae..e72345799 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Float.java +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java +index f58f21feb..f4f67cf52 100644 +--- a/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java ++++ b/test/hotspot/jtreg/compiler/loopopts/superword/SumRed_Int.java +@@ -25,7 +25,7 @@ + * @test + * @bug 8074981 + * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test +- * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" ++ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" | os.arch=="riscv64" + * + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:LoopUnrollLimit=250 + * -XX:CompileThresholdScaling=0.1 +diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +index acb86812d..c5e38ba72 100644 +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java +@@ -24,7 +24,7 @@ + + /* @test + * @bug 8167409 +- * @requires (os.arch != "aarch64") & (os.arch != "arm") ++ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.argumentcorruption.CheckLongArgs + */ + package compiler.runtime.criticalnatives.argumentcorruption; +diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +index eab36f931..4437367b6 100644 +--- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java ++++ b/test/hotspot/jtreg/compiler/runtime/criticalnatives/lookup/LookUp.java +@@ -24,7 +24,7 @@ + + /* @test + * @bug 8167408 +- * @requires (os.arch != "aarch64") & (os.arch != "arm") ++ * @requires (os.arch != "aarch64") & (os.arch != "arm") & (os.arch != "riscv64") + * @run main/othervm/native -Xcomp -XX:+CriticalJNINatives compiler.runtime.criticalnatives.lookup.LookUp + */ + package compiler.runtime.criticalnatives.lookup; +diff --git a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +index 7774dabcb..284b51019 100644 +--- a/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java ++++ b/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +@@ -61,15 +61,17 @@ public class IntrinsicPredicates { + + public static final BooleanSupplier SHA1_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha1" }, null), ++ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha1" }, null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, null), + // x86 variants + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), +- new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null)))))); ++ new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null))))))); + + public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" }, null), ++ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, null), + new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), +@@ -79,10 +81,11 @@ public class IntrinsicPredicates { + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), +- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); ++ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); + + public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE + = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null), ++ new OrPredicate(new CPUSpecificPredicate("riscv64.*", new String[] { "sha512" }, null), + new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha512" }, null), + new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha512" }, null), + new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), +@@ -92,7 +95,7 @@ public class IntrinsicPredicates { + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null), + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), +- new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); ++ new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))))); + + public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE + = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE, +diff --git a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java +index 57256aa5a..16c199e37 100644 +--- a/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java ++++ b/test/hotspot/jtreg/runtime/NMT/CheckForProperDetailStackTrace.java +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -112,7 +113,7 @@ public class CheckForProperDetailStackTrace { + // It's ok for ARM not to have symbols, because it does not support NMT detail + // when targeting thumb2. It's also ok for Windows not to have symbols, because + // they are only available if the symbols file is included with the build. +- if (Platform.isWindows() || Platform.isARM()) { ++ if (Platform.isWindows() || Platform.isARM() || Platform.isRISCV64()) { + return; // we are done + } + output.reportDiagnosticSummary(); +diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +index 127bb6abc..46be4dc98 100644 +--- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java ++++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -239,7 +240,7 @@ public class ReservedStackTest { + return Platform.isAix() || + (Platform.isLinux() && + (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || +- Platform.isX86())) || ++ Platform.isX86() || Platform.isRISCV64())) || + Platform.isOSX() || + Platform.isSolaris(); + } +diff --git a/test/hotspot/jtreg/test_env.sh b/test/hotspot/jtreg/test_env.sh +index 0c300d4fd..7f3698c47 100644 +--- a/test/hotspot/jtreg/test_env.sh ++++ b/test/hotspot/jtreg/test_env.sh +@@ -185,6 +185,11 @@ if [ $? = 0 ] + then + VM_CPU="arm" + fi ++grep "riscv64" vm_version.out > ${NULL} ++if [ $? = 0 ] ++then ++ VM_CPU="riscv64" ++fi + grep "ppc" vm_version.out > ${NULL} + if [ $? = 0 ] + then +diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +index 77458554b..73e92855d 100644 +--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java ++++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -45,7 +46,7 @@ import java.util.Set; + */ + public class TestMutuallyExclusivePlatformPredicates { + private static enum MethodGroup { +- ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), ++ ARCH("isRISCV64", "isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"), + BITNESS("is32bit", "is64bit"), + OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"), + VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"), +diff --git a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java +index cb3348a0f..bc0d1a743 100644 +--- a/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java ++++ b/test/hotspot/jtreg/vmTestbase/nsk/jvmti/GetThreadInfo/thrinfo001.java +@@ -63,13 +63,13 @@ public class thrinfo001 { + try { + t_a.join(); + } catch (InterruptedException e) {} ++ checkInfo(t_a, t_a.getThreadGroup(), 1); + + thrinfo001b t_b = new thrinfo001b(); + t_b.setPriority(Thread.MIN_PRIORITY); + t_b.setDaemon(true); + checkInfo(t_b, t_b.getThreadGroup(), 2); + t_b.start(); +- checkInfo(t_b, t_b.getThreadGroup(), 2); + try { + t_b.join(); + } catch (InterruptedException e) {} +diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +index 7990c49a1..bb8c79cdd 100644 +--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java ++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -54,8 +55,8 @@ public class TestCPUInformation { + Events.assertField(event, "hwThreads").atLeast(1); + Events.assertField(event, "cores").atLeast(1); + Events.assertField(event, "sockets").atLeast(1); +- Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390"); +- Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390"); ++ Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64"); ++ Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "RISCV64"); + } + } + } +diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java +index f4ee0546c..a9cd63db9 100644 +--- a/test/lib/jdk/test/lib/Platform.java ++++ b/test/lib/jdk/test/lib/Platform.java +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Huawei Technologies Co., Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -202,6 +203,10 @@ public class Platform { + return isArch("arm.*"); + } + ++ public static boolean isRISCV64() { ++ return isArch("riscv64"); ++ } ++ + public static boolean isPPC() { + return isArch("ppc.*"); + } +diff --git a/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java +new file mode 100644 +index 000000000..6852c0540 +--- /dev/null ++++ b/test/micro/org/openjdk/bench/java/lang/StringIndexOfChar.java +@@ -0,0 +1,221 @@ ++/* ++ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package org.openjdk.bench.java.lang; ++ ++import java.util.Random; ++import org.openjdk.jmh.annotations.Benchmark; ++import org.openjdk.jmh.annotations.BenchmarkMode; ++import org.openjdk.jmh.annotations.OutputTimeUnit; ++import org.openjdk.jmh.annotations.Mode; ++import org.openjdk.jmh.annotations.Scope; ++import org.openjdk.jmh.annotations.State; ++ ++import java.util.concurrent.TimeUnit; ++ ++/** ++ * This benchmark can be used to measure performance between StringLatin1 and StringUTF16 in terms of ++ * performance of the indexOf(char) and indexOf(String) methods which are intrinsified. ++ * On x86 the behaviour of the indexOf method is contingent upon the length of the string ++ */ ++@BenchmarkMode(Mode.AverageTime) ++@OutputTimeUnit(TimeUnit.NANOSECONDS) ++@State(Scope.Thread) ++public class IndexOfBenchmark { ++ private static final int loops = 100000; ++ private static final Random rng = new Random(1999); ++ private static final int pathCnt = 1000; ++ private static final String [] latn1_short = new String[pathCnt]; ++ private static final String [] latn1_sse4 = new String[pathCnt]; ++ private static final String [] latn1_avx2 = new String[pathCnt]; ++ private static final String [] latn1_mixedLength = new String[pathCnt]; ++ private static final String [] utf16_short = new String[pathCnt]; ++ private static final String [] utf16_sse4 = new String[pathCnt]; ++ private static final String [] utf16_avx2 = new String[pathCnt]; ++ private static final String [] utf16_mixedLength = new String[pathCnt]; ++ static { ++ for (int i = 0; i < pathCnt; i++) { ++ latn1_short[i] = makeRndString(false, 15); ++ latn1_sse4[i] = makeRndString(false, 16); ++ latn1_avx2[i] = makeRndString(false, 32); ++ utf16_short[i] = makeRndString(true, 7); ++ utf16_sse4[i] = makeRndString(true, 8); ++ utf16_avx2[i] = makeRndString(true, 16); ++ latn1_mixedLength[i] = makeRndString(false, rng.nextInt(65)); ++ utf16_mixedLength[i] = makeRndString(true, rng.nextInt(65)); ++ } ++ } ++ ++ private static String makeRndString(boolean isUtf16, int length) { ++ StringBuilder sb = new StringBuilder(length); ++ if(length > 0){ ++ sb.append(isUtf16?'☺':'b'); ++ ++ for (int i = 1; i < length-1; i++) { ++ sb.append((char)('b' + rng.nextInt(26))); ++ } ++ ++ sb.append(rng.nextInt(3) >= 1?'a':'b');//66.6% of time 'a' is in string ++ } ++ return sb.toString(); ++ } ++ ++ ++ @Benchmark ++ public static void latin1_mixed_char() { ++ int ret = 0; ++ for (String what : latn1_mixedLength) { ++ ret += what.indexOf('a'); ++ } ++ } ++ ++ @Benchmark ++ public static void utf16_mixed_char() { ++ int ret = 0; ++ for (String what : utf16_mixedLength) { ++ ret += what.indexOf('a'); ++ } ++ } ++ ++ @Benchmark ++ public static void latin1_mixed_String() { ++ int ret = 0; ++ for (String what : latn1_mixedLength) { ++ ret += what.indexOf("a"); ++ } ++ } ++ ++ @Benchmark ++ public static void utf16_mixed_String() { ++ int ret = 0; ++ for (String what : utf16_mixedLength) { ++ ret += what.indexOf("a"); ++ } ++ } ++ ++ ////////// more detailed code path dependent tests ////////// ++ ++ @Benchmark ++ public static void latin1_Short_char() { ++ int ret = 0; ++ for (String what : latn1_short) { ++ ret += what.indexOf('a'); ++ } ++ } ++ ++ @Benchmark ++ public static void latin1_SSE4_char() { ++ int ret = 0; ++ for (String what : latn1_sse4) { ++ ret += what.indexOf('a'); ++ } ++ } ++ ++ @Benchmark ++ public static void latin1_AVX2_char() { ++ int ret = 0; ++ for (String what : latn1_avx2) { ++ ret += what.indexOf('a'); ++ } ++ } ++ ++ @Benchmark ++ public static int utf16_Short_char() { ++ int ret = 0; ++ for (String what : utf16_short) { ++ ret += what.indexOf('a'); ++ } ++ return ret; ++ } ++ ++ @Benchmark ++ public static int utf16_SSE4_char() { ++ int ret = 0; ++ for (String what : utf16_sse4) { ++ ret += what.indexOf('a'); ++ } ++ return ret; ++ } ++ ++ @Benchmark ++ public static int utf16_AVX2_char() { ++ int ret = 0; ++ for (String what : utf16_avx2) { ++ ret += what.indexOf('a'); ++ } ++ return ret; ++ } ++ ++ @Benchmark ++ public static int latin1_Short_String() { ++ int ret = 0; ++ for (String what : latn1_short) { ++ ret += what.indexOf("a"); ++ } ++ return ret; ++ } ++ ++ @Benchmark ++ public static int latin1_SSE4_String() { ++ int ret = 0; ++ for (String what : latn1_sse4) { ++ ret += what.indexOf("a"); ++ } ++ return ret; ++ } ++ ++ @Benchmark ++ public static int latin1_AVX2_String() { ++ int ret = 0; ++ for (String what : latn1_avx2) { ++ ret += what.indexOf("a"); ++ } ++ return ret; ++ } ++ ++ @Benchmark ++ public static int utf16_Short_String() { ++ int ret = 0; ++ for (String what : utf16_short) { ++ ret += what.indexOf("a"); ++ } ++ return ret; ++ } ++ ++ @Benchmark ++ public static int utf16_SSE4_String() { ++ int ret = 0; ++ for (String what : utf16_sse4) { ++ ret += what.indexOf("a"); ++ } ++ return ret; ++ } ++ ++ @Benchmark ++ public static int utf16_AVX2_String() { ++ int ret = 0; ++ for (String what : utf16_avx2) { ++ ret += what.indexOf("a"); ++ } ++ return ret; ++ } ++} +-- +2.40.0.windows.1 + diff --git a/8210473-JEP-345-NUMA-Aware-Memory-Allocation-for-G1.patch b/8210473-JEP-345-NUMA-Aware-Memory-Allocation-for-G1.patch index 8fae6dd6f3299ba7d1b5ac480beb3ee15b165c52..5965562b15d739f491dc23e30ee42d8d6544dd1e 100644 --- a/8210473-JEP-345-NUMA-Aware-Memory-Allocation-for-G1.patch +++ b/8210473-JEP-345-NUMA-Aware-Memory-Allocation-for-G1.patch @@ -208,9 +208,9 @@ index f073aa63a..1323cc0ac 100644 // this is called _after_ the global arguments have been parsed jint os::init_2(void) { -@@ -5520,32 +5613,7 @@ jint os::init_2(void) { - Linux::libc_version(), Linux::libpthread_version()); - +@@ -5634,32 +5634,7 @@ jint os::init_2(void) { + #endif + if (UseNUMA) { - if (!Linux::libnuma_init()) { - UseNUMA = false; diff --git a/8214527-AArch64-ZGC-for-Aarch64.patch b/8214527-AArch64-ZGC-for-Aarch64.patch index c4c8a8670e8035217a707940816b27363669895a..65da560d03511476fd149c2d0dfbcd0dc2c9da45 100644 --- a/8214527-AArch64-ZGC-for-Aarch64.patch +++ b/8214527-AArch64-ZGC-for-Aarch64.patch @@ -16,30 +16,10 @@ diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch6 index ce103dc1c..53cd702b9 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad -@@ -957,6 +957,146 @@ reg_class v3_reg( - V3, V3_H +@@ -971,6 +971,126 @@ reg_class v7_reg( + V7, V7_H ); - -+// Class for 128 bit register v4 -+reg_class v4_reg( -+ V4, V4_H -+); -+ -+// Class for 128 bit register v5 -+reg_class v5_reg( -+ V5, V5_H -+); -+ -+// Class for 128 bit register v6 -+reg_class v6_reg( -+ V6, V6_H -+); -+ -+// Class for 128 bit register v7 -+reg_class v7_reg( -+ V7, V7_H -+); -+ + +// Class for 128 bit register v8 +reg_class v8_reg( + V8, V8_H @@ -175,46 +155,10 @@ index ce103dc1c..53cd702b9 100644 //----------SOURCE BLOCK------------------------------------------------------- // This is a block of C++ code which provides values, functions, and -@@ -4761,6 +4906,258 @@ operand vRegD_V3() +@@ -4940,6 +4940,222 @@ operand vRegD_V7() interface(REG_INTER); %} -+operand vRegD_V4() -+%{ -+ constraint(ALLOC_IN_RC(v4_reg)); -+ match(RegD); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand vRegD_V5() -+%{ -+ constraint(ALLOC_IN_RC(v5_reg)); -+ match(RegD); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand vRegD_V6() -+%{ -+ constraint(ALLOC_IN_RC(v6_reg)); -+ match(RegD); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand vRegD_V7() -+%{ -+ constraint(ALLOC_IN_RC(v7_reg)); -+ match(RegD); -+ op_cost(0); -+ format %{ %} -+ interface(REG_INTER); -+%} -+ +operand vRegD_V8() +%{ + constraint(ALLOC_IN_RC(v8_reg)); diff --git a/8231441-3-AArch64-Initial-SVE-backend-support.patch b/8231441-3-AArch64-Initial-SVE-backend-support.patch index 3691f9dfefabf92ff666d4b6d844072bd2500fab..3418659180d38bd8521a98d070e1f2a0136ad5c6 100755 --- a/8231441-3-AArch64-Initial-SVE-backend-support.patch +++ b/8231441-3-AArch64-Initial-SVE-backend-support.patch @@ -1,8 +1,8 @@ diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk -index a39640526..2479853fa 100644 +index 2af2f9a..f23b972 100644 --- a/make/hotspot/gensrc/GensrcAdlc.gmk +++ b/make/hotspot/gensrc/GensrcAdlc.gmk -@@ -146,6 +146,12 @@ ifeq ($(call check-jvm-feature, compiler2), true) +@@ -156,6 +156,12 @@ ifeq ($(call check-jvm-feature, compiler2), true) ))) endif @@ -15,436 +15,709 @@ index a39640526..2479853fa 100644 ifeq ($(call check-jvm-feature, shenandoahgc), true) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ -diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad -index 64985e498..fa434df7d 100644 ---- a/src/hotspot/cpu/aarch64/aarch64.ad -+++ b/src/hotspot/cpu/aarch64/aarch64.ad -@@ -1991,6 +1991,10 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { - // branch if we need to invalidate the method later - __ nop(); - -+ if (UseSVE > 0 && C->max_vector_size() >= 16) { -+ __ reinitialize_ptrue(); -+ } -+ - int bangsize = C->bang_size_in_bytes(); - if (C->need_stack_bang(bangsize) && UseStackBanging) - __ generate_stack_overflow_check(bangsize); -@@ -2157,8 +2161,28 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo - - if (bottom_type()->isa_vect() != NULL) { - uint ireg = ideal_reg(); -- assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); -- if (cbuf) { -+ if (ireg == Op_VecA && cbuf) { -+ MacroAssembler _masm(cbuf); -+ int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); -+ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { -+ // stack->stack -+ __ spill_copy_sve_vector_stack_to_stack(src_offset, dst_offset, -+ sve_vector_reg_size_in_bytes); -+ } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { -+ __ spill_sve_vector(as_FloatRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo), -+ sve_vector_reg_size_in_bytes); -+ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { -+ __ unspill_sve_vector(as_FloatRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo), -+ sve_vector_reg_size_in_bytes); -+ } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { -+ __ sve_orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), -+ as_FloatRegister(Matcher::_regEncode[src_lo]), -+ as_FloatRegister(Matcher::_regEncode[src_lo])); -+ } else { -+ ShouldNotReachHere(); -+ } -+ } else if (cbuf) { -+ assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); - MacroAssembler _masm(cbuf); - assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); - if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { -@@ -2442,15 +2466,28 @@ const bool Matcher::match_rule_supported(int opcode) { - return true; // Per default match rules are supported. - } - --const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { -- -- // TODO -- // identify extra cases that we might want to provide match rules for -- // e.g. Op_ vector nodes and other intrinsics while guarding with vlen -- bool ret_value = match_rule_supported(opcode); -- // Add rules here. -- -- return ret_value; // Per default match rules are supported. -+ // Identify extra cases that we might want to provide match rules for vector nodes and -+ // other intrinsics guarded with vector length (vlen) and element type (bt). -+ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { -+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { -+ return false; -+ } -+ int bit_size = vlen * type2aelembytes(bt) * 8; -+ if (UseSVE == 0 && bit_size > 128) { -+ return false; -+ } -+ if (UseSVE > 0) { -+ return op_sve_supported(opcode); -+ } else { // NEON -+ // Special cases -+ switch (opcode) { -+ case Op_MulVL: -+ return false; -+ default: -+ break; -+ } -+ } -+ return true; // Per default match rules are supported. - } - - const bool Matcher::has_predicated_vectors(void) { -@@ -3691,6 +3728,11 @@ encode %{ - if (call == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); - return; -+ } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { -+ // Only non uncommon_trap calls need to reinitialize ptrue. -+ if (uncommon_trap_request() == 0) { -+ __ reinitialize_ptrue(); -+ } - } - // Emit stub for static call - address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); -@@ -3708,6 +3750,8 @@ encode %{ - if (call == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); - return; -+ } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { -+ __ reinitialize_ptrue(); - } - %} - -@@ -3744,6 +3788,9 @@ encode %{ - __ bind(retaddr); - __ add(sp, sp, 2 * wordSize); - } -+ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { -+ __ reinitialize_ptrue(); -+ } - %} - - enc_class aarch64_enc_rethrow() %{ -@@ -3753,6 +3800,11 @@ encode %{ - - enc_class aarch64_enc_ret() %{ - MacroAssembler _masm(&cbuf); -+#ifdef ASSERT -+ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { -+ __ verify_ptrue(); -+ } -+#endif - __ ret(lr); - %} - -@@ -4494,6 +4546,41 @@ operand immLoffset16() - interface(CONST_INTER); - %} - -+// 8 bit signed value. -+operand immI8() -+%{ -+ predicate(n->get_int() <= 127 && n->get_int() >= -128); -+ match(ConI); +diff --git a/src/hotspot/cpu/aarch64/aarch64-asmtest.py b/src/hotspot/cpu/aarch64/aarch64-asmtest.py +index 31c6965..e621402 100644 +--- a/src/hotspot/cpu/aarch64/aarch64-asmtest.py ++++ b/src/hotspot/cpu/aarch64/aarch64-asmtest.py +@@ -73,6 +73,48 @@ class GeneralRegisterOrSp(Register): + return self.astr() + else: + return self.astr("r") ++class SVEVectorRegister(FloatRegister): ++ def __str__(self): ++ return self.astr("z") + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++class SVEPRegister(Register): ++ def __str__(self): ++ return self.astr("p") + -+// 8 bit signed value (simm8), or #simm8 LSL 8. -+operand immI8_shift8() -+%{ -+ predicate((n->get_int() <= 127 && n->get_int() >= -128) || -+ (n->get_int() <= 32512 && n->get_int() >= -32768 && (n->get_int() & 0xff) == 0)); -+ match(ConI); ++ def generate(self): ++ self.number = random.randint(0, 15) ++ return self + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++class SVEGoverningPRegister(Register): ++ def __str__(self): ++ return self.astr("p") ++ def generate(self): ++ self.number = random.randint(0, 7) ++ return self + -+// 8 bit signed value (simm8), or #simm8 LSL 8. -+operand immL8_shift8() -+%{ -+ predicate((n->get_long() <= 127 && n->get_long() >= -128) || -+ (n->get_long() <= 32512 && n->get_long() >= -32768 && (n->get_long() & 0xff) == 0)); -+ match(ConL); ++class RegVariant(object): ++ def __init__(self, low, high): ++ self.number = random.randint(low, high) + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++ def astr(self): ++ nameMap = { ++ 0: ".b", ++ 1: ".h", ++ 2: ".s", ++ 3: ".d", ++ 4: ".q" ++ } ++ return nameMap.get(self.number) + - // 32 bit integer valid for add sub immediate - operand immIAddSub() - %{ -@@ -16038,7 +16125,7 @@ instruct loadV8(vecD dst, vmem8 mem) - // Load Vector (128 bits) - instruct loadV16(vecX dst, vmem16 mem) - %{ -- predicate(n->as_LoadVector()->memory_size() == 16); -+ predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16); - match(Set dst (LoadVector mem)); - ins_cost(4 * INSN_COST); - format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} -@@ -16094,7 +16181,7 @@ instruct replicate8B(vecD dst, iRegIorL2I src) - - instruct replicate16B(vecX dst, iRegIorL2I src) - %{ -- predicate(n->as_Vector()->length() == 16); -+ predicate(UseSVE == 0 && n->as_Vector()->length() == 16); - match(Set dst (ReplicateB src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (16B)" %} -@@ -16119,7 +16206,7 @@ instruct replicate8B_imm(vecD dst, immI con) - - instruct replicate16B_imm(vecX dst, immI con) - %{ -- predicate(n->as_Vector()->length() == 16); -+ predicate(UseSVE == 0 && n->as_Vector()->length() == 16); - match(Set dst (ReplicateB con)); - ins_cost(INSN_COST); - format %{ "movi $dst, $con\t# vector(16B)" %} -@@ -16144,7 +16231,7 @@ instruct replicate4S(vecD dst, iRegIorL2I src) ++ def cstr(self): ++ nameMap = { ++ 0: "__ B", ++ 1: "__ H", ++ 2: "__ S", ++ 3: "__ D", ++ 4: "__ Q" ++ } ++ return nameMap.get(self.number) - instruct replicate8S(vecX dst, iRegIorL2I src) - %{ -- predicate(n->as_Vector()->length() == 8); -+ predicate(UseSVE == 0 && n->as_Vector()->length() == 8); - match(Set dst (ReplicateS src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (8S)" %} -@@ -16169,7 +16256,7 @@ instruct replicate4S_imm(vecD dst, immI con) + class FloatZero(Operand): - instruct replicate8S_imm(vecX dst, immI con) - %{ -- predicate(n->as_Vector()->length() == 8); -+ predicate(UseSVE == 0 && n->as_Vector()->length() == 8); - match(Set dst (ReplicateS con)); - ins_cost(INSN_COST); - format %{ "movi $dst, $con\t# vector(8H)" %} -@@ -16193,7 +16280,7 @@ instruct replicate2I(vecD dst, iRegIorL2I src) +@@ -88,7 +130,10 @@ class OperandFactory: + 'w' : GeneralRegister, + 's' : FloatRegister, + 'd' : FloatRegister, +- 'z' : FloatZero} ++ 'z' : FloatZero, ++ 'p' : SVEPRegister, ++ 'P' : SVEGoverningPRegister, ++ 'Z' : SVEVectorRegister} - instruct replicate4I(vecX dst, iRegIorL2I src) - %{ -- predicate(n->as_Vector()->length() == 4); -+ predicate(UseSVE == 0 && n->as_Vector()->length() == 4); - match(Set dst (ReplicateI src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (4I)" %} -@@ -16217,7 +16304,7 @@ instruct replicate2I_imm(vecD dst, immI con) + @classmethod + def create(cls, mode): +@@ -834,6 +879,100 @@ class FloatInstruction(Instruction): + % tuple([Instruction.astr(self)] + + [(self.reg[i].astr(self.modes[i])) for i in range(self.numRegs)])) - instruct replicate4I_imm(vecX dst, immI con) - %{ -- predicate(n->as_Vector()->length() == 4); -+ predicate(UseSVE == 0 && n->as_Vector()->length() == 4); - match(Set dst (ReplicateI con)); - ins_cost(INSN_COST); - format %{ "movi $dst, $con\t# vector(4I)" %} -@@ -16229,7 +16316,7 @@ instruct replicate4I_imm(vecX dst, immI con) - - instruct replicate2L(vecX dst, iRegL src) - %{ -- predicate(n->as_Vector()->length() == 2); -+ predicate(UseSVE == 0 && n->as_Vector()->length() == 2); - match(Set dst (ReplicateL src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (2L)" %} -@@ -16241,7 +16328,7 @@ instruct replicate2L(vecX dst, iRegL src) - - instruct replicate2L_zero(vecX dst, immI0 zero) - %{ -- predicate(n->as_Vector()->length() == 2); -+ predicate(UseSVE == 0 && n->as_Vector()->length() == 2); - match(Set dst (ReplicateI zero)); - ins_cost(INSN_COST); - format %{ "movi $dst, $zero\t# vector(4I)" %} -@@ -16268,7 +16355,7 @@ instruct replicate2F(vecD dst, vRegF src) - - instruct replicate4F(vecX dst, vRegF src) - %{ -- predicate(n->as_Vector()->length() == 4); -+ predicate(UseSVE == 0 && n->as_Vector()->length() == 4); - match(Set dst (ReplicateF src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (4F)" %} -@@ -16281,7 +16368,7 @@ instruct replicate4F(vecX dst, vRegF src) - - instruct replicate2D(vecX dst, vRegD src) - %{ -- predicate(n->as_Vector()->length() == 2); -+ predicate(UseSVE == 0 && n->as_Vector()->length() == 2); - match(Set dst (ReplicateD src)); - ins_cost(INSN_COST); - format %{ "dup $dst, $src\t# vector (2D)" %} -diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad -new file mode 100644 -index 000000000..8d80cb37a ---- /dev/null -+++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad -@@ -0,0 +1,1366 @@ -+// -+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. -+// Copyright (c) 2020, Arm Limited. All rights reserved. -+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+// -+// This code is free software; you can redistribute it and/or modify it -+// under the terms of the GNU General Public License version 2 only, as -+// published by the Free Software Foundation. -+// -+// This code is distributed in the hope that it will be useful, but WITHOUT -+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+// version 2 for more details (a copy is included in the LICENSE file that -+// accompanied this code). -+// -+// You should have received a copy of the GNU General Public License version -+// 2 along with this work; if not, write to the Free Software Foundation, -+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+// -+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+// or visit www.oracle.com if you need additional information or have any -+// questions. -+// -+// ++class SVEVectorOp(Instruction): ++ def __init__(self, args): ++ name = args[0] ++ regTypes = args[1] ++ regs = [] ++ for c in regTypes: ++ regs.append(OperandFactory.create(c).generate()) ++ self.reg = regs ++ self.numRegs = len(regs) ++ if regTypes[0] != "p" and regTypes[1] == 'P': ++ self._isPredicated = True ++ self._merge = "/m" ++ else: ++ self._isPredicated = False ++ self._merge ="" + -+// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ---- ++ self._bitwiseop = False ++ if name[0] == 'f': ++ self._width = RegVariant(2, 3) ++ elif not self._isPredicated and (name in ["and", "eor", "orr", "bic"]): ++ self._width = RegVariant(3, 3) ++ self._bitwiseop = True ++ else: ++ self._width = RegVariant(0, 3) ++ if len(args) > 2: ++ self._dnm = args[2] ++ else: ++ self._dnm = None ++ Instruction.__init__(self, name) + -+// AArch64 SVE Architecture Description File ++ def cstr(self): ++ formatStr = "%s%s" + ''.join([", %s" for i in range(0, self.numRegs)] + [");"]) ++ if self._bitwiseop: ++ width = [] ++ formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)] + [");"]) ++ else: ++ width = [self._width.cstr()] ++ return (formatStr ++ % tuple(["__ sve_" + self._name + "("] + ++ [str(self.reg[0])] + ++ width + ++ [str(self.reg[i]) for i in range(1, self.numRegs)])) ++ def astr(self): ++ formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)]) ++ if self._dnm == 'dn': ++ formatStr += ", %s" ++ dnReg = [str(self.reg[0]) + self._width.astr()] ++ else: ++ dnReg = [] + ++ if self._isPredicated: ++ restRegs = [str(self.reg[1]) + self._merge] + dnReg + [str(self.reg[i]) + self._width.astr() for i in range(2, self.numRegs)] ++ else: ++ restRegs = dnReg + [str(self.reg[i]) + self._width.astr() for i in range(1, self.numRegs)] ++ return (formatStr ++ % tuple([Instruction.astr(self)] + ++ [str(self.reg[0]) + self._width.astr()] + ++ restRegs)) ++ def generate(self): ++ return self + -+// 4 bit signed offset -- for predicated load/store ++class SVEReductionOp(Instruction): ++ def __init__(self, args): ++ name = args[0] ++ lowRegType = args[1] ++ self.reg = [] ++ Instruction.__init__(self, name) ++ self.reg.append(OperandFactory.create('s').generate()) ++ self.reg.append(OperandFactory.create('P').generate()) ++ self.reg.append(OperandFactory.create('Z').generate()) ++ self._width = RegVariant(lowRegType, 3) ++ def cstr(self): ++ return "__ sve_%s(%s, %s, %s, %s);" % (self.name(), ++ str(self.reg[0]), ++ self._width.cstr(), ++ str(self.reg[1]), ++ str(self.reg[2])) ++ def astr(self): ++ if self.name() == "uaddv": ++ dstRegName = "d" + str(self.reg[0].number) ++ else: ++ dstRegName = self._width.astr()[1] + str(self.reg[0].number) ++ formatStr = "%s %s, %s, %s" ++ if self.name() == "fadda": ++ formatStr += ", %s" ++ moreReg = [dstRegName] ++ else: ++ moreReg = [] ++ return formatStr % tuple([self.name()] + ++ [dstRegName] + ++ [str(self.reg[1])] + ++ moreReg + ++ [str(self.reg[2]) + self._width.astr()]) + -+operand vmemA_immIOffset4() -+%{ -+ predicate(Address::offset_ok_for_sve_immed(n->get_int(), 4, -+ Matcher::scalable_vector_reg_size(T_BYTE))); -+ match(ConI); + class LdStSIMDOp(Instruction): + def __init__(self, args): + self._name, self.regnum, self.arrangement, self.addresskind = args +@@ -1120,7 +1259,42 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", + ["mov", "__ mov(v1, __ T2S, 1, zr);", "mov\tv1.s[1], wzr"], + ["mov", "__ mov(v1, __ T4H, 2, zr);", "mov\tv1.h[2], wzr"], + ["mov", "__ mov(v1, __ T8B, 3, zr);", "mov\tv1.b[3], wzr"], +- ["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"]]) ++ ["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"], ++ # SVE instructions ++ ["cpy", "__ sve_cpy(z0, __ S, p0, v1);", "mov\tz0.s, p0/m, s1"], ++ ["inc", "__ sve_inc(r0, __ S);", "incw\tx0"], ++ ["dec", "__ sve_dec(r1, __ H);", "dech\tx1"], ++ ["lsl", "__ sve_lsl(z0, __ B, z1, 7);", "lsl\tz0.b, z1.b, #7"], ++ ["lsl", "__ sve_lsl(z21, __ H, z1, 15);", "lsl\tz21.h, z1.h, #15"], ++ ["lsl", "__ sve_lsl(z0, __ S, z1, 31);", "lsl\tz0.s, z1.s, #31"], ++ ["lsl", "__ sve_lsl(z0, __ D, z1, 63);", "lsl\tz0.d, z1.d, #63"], ++ ["lsr", "__ sve_lsr(z0, __ B, z1, 7);", "lsr\tz0.b, z1.b, #7"], ++ ["asr", "__ sve_asr(z0, __ H, z11, 15);", "asr\tz0.h, z11.h, #15"], ++ ["lsr", "__ sve_lsr(z30, __ S, z1, 31);", "lsr\tz30.s, z1.s, #31"], ++ ["asr", "__ sve_asr(z0, __ D, z1, 63);", "asr\tz0.d, z1.d, #63"], ++ ["addvl", "__ sve_addvl(sp, r0, 31);", "addvl\tsp, x0, #31"], ++ ["addpl", "__ sve_addpl(r1, sp, -32);", "addpl\tx1, sp, -32"], ++ ["cntp", "__ sve_cntp(r8, __ B, p0, p1);", "cntp\tx8, p0, p1.b"], ++ ["dup", "__ sve_dup(z0, __ B, 127);", "dup\tz0.b, 127"], ++ ["dup", "__ sve_dup(z1, __ H, -128);", "dup\tz1.h, -128"], ++ ["dup", "__ sve_dup(z2, __ S, 32512);", "dup\tz2.s, 32512"], ++ ["dup", "__ sve_dup(z7, __ D, -32768);", "dup\tz7.d, -32768"], ++ ["ld1b", "__ sve_ld1b(z0, __ B, p0, Address(sp));", "ld1b\t{z0.b}, p0/z, [sp]"], ++ ["ld1h", "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));", "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"], ++ ["ld1w", "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));", "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"], ++ ["ld1b", "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));", "ld1b\t{z30.b}, p3/z, [sp, x8]"], ++ ["ld1w", "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));", "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"], ++ ["ld1d", "__ sve_ld1d(z11, __ D, p5, Address(r0, r1));", "ld1d\t{z11.d}, p5/z, [x0, x1, LSL #3]"], ++ ["st1b", "__ sve_st1b(z22, __ B, p6, Address(sp));", "st1b\t{z22.b}, p6, [sp]"], ++ ["st1b", "__ sve_st1b(z31, __ B, p7, Address(sp, -8));", "st1b\t{z31.b}, p7, [sp, #-8, MUL VL]"], ++ ["st1w", "__ sve_st1w(z0, __ S, p1, Address(r0, 7));", "st1w\t{z0.s}, p1, [x0, #7, MUL VL]"], ++ ["st1b", "__ sve_st1b(z0, __ B, p2, Address(sp, r1));", "st1b\t{z0.b}, p2, [sp, x1]"], ++ ["st1h", "__ sve_st1h(z0, __ H, p3, Address(sp, r8));", "st1h\t{z0.h}, p3, [sp, x8, LSL #1]"], ++ ["st1d", "__ sve_st1d(z0, __ D, p4, Address(r0, r8));", "st1d\t{z0.d}, p4, [x0, x8, LSL #3]"], ++ ["ldr", "__ sve_ldr(z0, Address(sp));", "ldr\tz0, [sp]"], ++ ["ldr", "__ sve_ldr(z31, Address(sp, -256));", "ldr\tz31, [sp, #-256, MUL VL]"], ++ ["str", "__ sve_str(z8, Address(r8, 255));", "str\tz8, [x8, #255, MUL VL]"], ++]) + + print "\n// FloatImmediateOp" + for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125", +@@ -1145,6 +1319,50 @@ for size in ("x", "w"): + ["ldumin", "ldumin", size, suffix], + ["ldumax", "ldumax", size, suffix]]); + + -+ op_cost(0); -+ format %{ %} -+ interface(CONST_INTER); -+%} ++generate(SVEVectorOp, [["add", "ZZZ"], ++ ["sub", "ZZZ"], ++ ["fadd", "ZZZ"], ++ ["fmul", "ZZZ"], ++ ["fsub", "ZZZ"], ++ ["abs", "ZPZ"], ++ ["add", "ZPZ", "dn"], ++ ["asr", "ZPZ", "dn"], ++ ["cnt", "ZPZ"], ++ ["lsl", "ZPZ", "dn"], ++ ["lsr", "ZPZ", "dn"], ++ ["mul", "ZPZ", "dn"], ++ ["neg", "ZPZ"], ++ ["not", "ZPZ"], ++ ["smax", "ZPZ", "dn"], ++ ["smin", "ZPZ", "dn"], ++ ["sub", "ZPZ", "dn"], ++ ["fabs", "ZPZ"], ++ ["fadd", "ZPZ", "dn"], ++ ["fdiv", "ZPZ", "dn"], ++ ["fmax", "ZPZ", "dn"], ++ ["fmin", "ZPZ", "dn"], ++ ["fmul", "ZPZ", "dn"], ++ ["fneg", "ZPZ"], ++ ["frintm", "ZPZ"], ++ ["frintn", "ZPZ"], ++ ["frintp", "ZPZ"], ++ ["fsqrt", "ZPZ"], ++ ["fsub", "ZPZ", "dn"], ++ ["fmla", "ZPZZ"], ++ ["fmls", "ZPZZ"], ++ ["fnmla", "ZPZZ"], ++ ["fnmls", "ZPZZ"], ++ ["mla", "ZPZZ"], ++ ["mls", "ZPZZ"], ++ ["and", "ZZZ"], ++ ["eor", "ZZZ"], ++ ["orr", "ZZZ"], ++ ]) + -+operand vmemA_immLOffset4() ++generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0], ++ ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]]) ++ + print "\n __ bind(forth);" + outfile.write("forth:\n") + +@@ -1153,8 +1371,8 @@ outfile.close() + import subprocess + import sys + +-# compile for 8.1 because of lse atomics +-subprocess.check_call([AARCH64_AS, "-march=armv8.1-a", "aarch64ops.s", "-o", "aarch64ops.o"]) ++# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension. ++subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"]) + + print + print "/*", +diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad +index f126488..8a92ff2 100644 +--- a/src/hotspot/cpu/aarch64/aarch64.ad ++++ b/src/hotspot/cpu/aarch64/aarch64.ad +@@ -2006,6 +2006,10 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + // branch if we need to invalidate the method later + __ nop(); + ++ if (UseSVE > 0 && C->max_vector_size() >= 16) { ++ __ reinitialize_ptrue(); ++ } ++ + int bangsize = C->bang_size_in_bytes(); + if (C->need_stack_bang(bangsize) && UseStackBanging) + __ generate_stack_overflow_check(bangsize); +@@ -2172,8 +2176,28 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo + + if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); +- assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); +- if (cbuf) { ++ if (ireg == Op_VecA && cbuf) { ++ MacroAssembler _masm(cbuf); ++ int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); ++ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { ++ // stack->stack ++ __ spill_copy_sve_vector_stack_to_stack(src_offset, dst_offset, ++ sve_vector_reg_size_in_bytes); ++ } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { ++ __ spill_sve_vector(as_FloatRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo), ++ sve_vector_reg_size_in_bytes); ++ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { ++ __ unspill_sve_vector(as_FloatRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo), ++ sve_vector_reg_size_in_bytes); ++ } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { ++ __ sve_orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (cbuf) { ++ assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); + MacroAssembler _masm(cbuf); + assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { +@@ -2452,15 +2476,28 @@ const bool Matcher::match_rule_supported(int opcode) { + return true; // Per default match rules are supported. + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { +- +- // TODO +- // identify extra cases that we might want to provide match rules for +- // e.g. Op_ vector nodes and other intrinsics while guarding with vlen +- bool ret_value = match_rule_supported(opcode); +- // Add rules here. +- +- return ret_value; // Per default match rules are supported. ++ // Identify extra cases that we might want to provide match rules for vector nodes and ++ // other intrinsics guarded with vector length (vlen) and element type (bt). ++ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { ++ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { ++ return false; ++ } ++ int bit_size = vlen * type2aelembytes(bt) * 8; ++ if (UseSVE == 0 && bit_size > 128) { ++ return false; ++ } ++ if (UseSVE > 0) { ++ return op_sve_supported(opcode); ++ } else { // NEON ++ // Special cases ++ switch (opcode) { ++ case Op_MulVL: ++ return false; ++ default: ++ break; ++ } ++ } ++ return true; // Per default match rules are supported. + } + + const bool Matcher::has_predicated_vectors(void) { +@@ -3812,6 +3849,12 @@ encode %{ + return; + } + } ++ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { ++ // Only non uncommon_trap calls need to reinitialize ptrue. ++ if (uncommon_trap_request() == 0) { ++ __ reinitialize_ptrue(); ++ } ++ } + %} + + enc_class aarch64_enc_java_dynamic_call(method meth) %{ +@@ -3821,6 +3864,8 @@ encode %{ + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; ++ } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { ++ __ reinitialize_ptrue(); + } + %} + +@@ -3857,6 +3902,9 @@ encode %{ + __ bind(retaddr); + __ add(sp, sp, 2 * wordSize); + } ++ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { ++ __ reinitialize_ptrue(); ++ } + %} + + enc_class aarch64_enc_rethrow() %{ +@@ -3866,6 +3914,11 @@ encode %{ + + enc_class aarch64_enc_ret() %{ + MacroAssembler _masm(&cbuf); ++#ifdef ASSERT ++ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { ++ __ verify_ptrue(); ++ } ++#endif + __ ret(lr); + %} + +@@ -4607,6 +4660,41 @@ operand immLoffset16() + interface(CONST_INTER); + %} + ++// 8 bit signed value. ++operand immI8() +%{ -+ predicate(Address::offset_ok_for_sve_immed(n->get_long(), 4, -+ Matcher::scalable_vector_reg_size(T_BYTE))); -+ match(ConL); ++ predicate(n->get_int() <= 127 && n->get_int() >= -128); ++ match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + -+ -+operand vmemA_indOffI4(iRegP reg, vmemA_immIOffset4 off) ++// 8 bit signed value (simm8), or #simm8 LSL 8. ++operand immI8_shift8() +%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP reg off); -+ op_cost(0); -+ format %{ "[$reg, $off, MUL VL]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} -+%} ++ predicate((n->get_int() <= 127 && n->get_int() >= -128) || ++ (n->get_int() <= 32512 && n->get_int() >= -32768 && (n->get_int() & 0xff) == 0)); ++ match(ConI); + -+operand vmemA_indOffL4(iRegP reg, vmemA_immLOffset4 off) -+%{ -+ constraint(ALLOC_IN_RC(ptr_reg)); -+ match(AddP reg off); + op_cost(0); -+ format %{ "[$reg, $off, MUL VL]" %} -+ interface(MEMORY_INTER) %{ -+ base($reg); -+ index(0xffffffff); -+ scale(0x0); -+ disp($off); -+ %} ++ format %{ %} ++ interface(CONST_INTER); +%} + -+opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4); ++// 8 bit signed value (simm8), or #simm8 LSL 8. ++operand immL8_shift8() ++%{ ++ predicate((n->get_long() <= 127 && n->get_long() >= -128) || ++ (n->get_long() <= 32512 && n->get_long() >= -32768 && (n->get_long() & 0xff) == 0)); ++ match(ConL); + -+source_hpp %{ -+ bool op_sve_supported(int opcode); ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); +%} + -+source %{ -+ -+ static inline BasicType vector_element_basic_type(const MachNode* n) { -+ const TypeVect* vt = n->bottom_type()->is_vect(); -+ return vt->element_basic_type(); -+ } -+ -+ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { -+ int def_idx = use->operand_index(opnd); -+ Node* def = use->in(def_idx); -+ const TypeVect* vt = def->bottom_type()->is_vect(); -+ return vt->element_basic_type(); -+ } -+ -+ typedef void (MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T, -+ PRegister Pg, const Address &adr); -+ -+ // Predicated load/store, with optional ptrue to all elements of given predicate register. -+ static void loadStoreA_predicate(MacroAssembler masm, bool is_store, -+ FloatRegister reg, PRegister pg, BasicType bt, -+ int opcode, Register base, int index, int size, int disp) { -+ sve_mem_insn_predicate insn = NULL; -+ Assembler::SIMD_RegVariant type = Assembler::B; -+ int esize = type2aelembytes(bt); -+ if (index == -1) { -+ assert(size == 0, "unsupported address mode: scale size = %d", size); -+ switch(esize) { -+ case 1: -+ insn = is_store ? &MacroAssembler::sve_st1b : &MacroAssembler::sve_ld1b; -+ type = Assembler::B; -+ break; -+ case 2: -+ insn = is_store ? &MacroAssembler::sve_st1h : &MacroAssembler::sve_ld1h; -+ type = Assembler::H; -+ break; -+ case 4: -+ insn = is_store ? &MacroAssembler::sve_st1w : &MacroAssembler::sve_ld1w; -+ type = Assembler::S; -+ break; -+ case 8: -+ insn = is_store ? &MacroAssembler::sve_st1d : &MacroAssembler::sve_ld1d; -+ type = Assembler::D; -+ break; -+ default: -+ assert(false, "unsupported"); -+ ShouldNotReachHere(); -+ } -+ (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE))); -+ } else { -+ assert(false, "unimplemented"); -+ ShouldNotReachHere(); -+ } -+ } -+ -+ bool op_sve_supported(int opcode) { -+ switch (opcode) { -+ // No multiply reduction instructions -+ case Op_MulReductionVD: -+ case Op_MulReductionVF: -+ case Op_MulReductionVI: -+ case Op_MulReductionVL: -+ // Others -+ case Op_Extract: -+ case Op_ExtractB: -+ case Op_ExtractC: -+ case Op_ExtractD: -+ case Op_ExtractF: + // 32 bit integer valid for add sub immediate + operand immIAddSub() + %{ +@@ -16433,7 +16521,7 @@ instruct loadV8(vecD dst, vmem8 mem) + // Load Vector (128 bits) + instruct loadV16(vecX dst, vmem16 mem) + %{ +- predicate(n->as_LoadVector()->memory_size() == 16); ++ predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} +@@ -16489,7 +16577,7 @@ instruct replicate8B(vecD dst, iRegIorL2I src) + + instruct replicate16B(vecX dst, iRegIorL2I src) + %{ +- predicate(n->as_Vector()->length() == 16); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 16); + match(Set dst (ReplicateB src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (16B)" %} +@@ -16514,7 +16602,7 @@ instruct replicate8B_imm(vecD dst, immI con) + + instruct replicate16B_imm(vecX dst, immI con) + %{ +- predicate(n->as_Vector()->length() == 16); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 16); + match(Set dst (ReplicateB con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(16B)" %} +@@ -16539,7 +16627,7 @@ instruct replicate4S(vecD dst, iRegIorL2I src) + + instruct replicate8S(vecX dst, iRegIorL2I src) + %{ +- predicate(n->as_Vector()->length() == 8); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 8); + match(Set dst (ReplicateS src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (8S)" %} +@@ -16564,7 +16652,7 @@ instruct replicate4S_imm(vecD dst, immI con) + + instruct replicate8S_imm(vecX dst, immI con) + %{ +- predicate(n->as_Vector()->length() == 8); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 8); + match(Set dst (ReplicateS con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(8H)" %} +@@ -16588,7 +16676,7 @@ instruct replicate2I(vecD dst, iRegIorL2I src) + + instruct replicate4I(vecX dst, iRegIorL2I src) + %{ +- predicate(n->as_Vector()->length() == 4); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 4); + match(Set dst (ReplicateI src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4I)" %} +@@ -16612,7 +16700,7 @@ instruct replicate2I_imm(vecD dst, immI con) + + instruct replicate4I_imm(vecX dst, immI con) + %{ +- predicate(n->as_Vector()->length() == 4); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 4); + match(Set dst (ReplicateI con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(4I)" %} +@@ -16624,7 +16712,7 @@ instruct replicate4I_imm(vecX dst, immI con) + + instruct replicate2L(vecX dst, iRegL src) + %{ +- predicate(n->as_Vector()->length() == 2); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2L)" %} +@@ -16636,7 +16724,7 @@ instruct replicate2L(vecX dst, iRegL src) + + instruct replicate2L_zero(vecX dst, immI0 zero) + %{ +- predicate(n->as_Vector()->length() == 2); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 2); + match(Set dst (ReplicateI zero)); + ins_cost(INSN_COST); + format %{ "movi $dst, $zero\t# vector(4I)" %} +@@ -16663,7 +16751,7 @@ instruct replicate2F(vecD dst, vRegF src) + + instruct replicate4F(vecX dst, vRegF src) + %{ +- predicate(n->as_Vector()->length() == 4); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 4); + match(Set dst (ReplicateF src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4F)" %} +@@ -16676,7 +16764,7 @@ instruct replicate4F(vecX dst, vRegF src) + + instruct replicate2D(vecX dst, vRegD src) + %{ +- predicate(n->as_Vector()->length() == 2); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 2); + match(Set dst (ReplicateD src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2D)" %} +diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad +new file mode 100644 +index 0000000..8d80cb3 +--- /dev/null ++++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad +@@ -0,0 +1,1366 @@ ++// ++// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2020, Arm Limited. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ---- ++ ++// AArch64 SVE Architecture Description File ++ ++ ++// 4 bit signed offset -- for predicated load/store ++ ++operand vmemA_immIOffset4() ++%{ ++ predicate(Address::offset_ok_for_sve_immed(n->get_int(), 4, ++ Matcher::scalable_vector_reg_size(T_BYTE))); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand vmemA_immLOffset4() ++%{ ++ predicate(Address::offset_ok_for_sve_immed(n->get_long(), 4, ++ Matcher::scalable_vector_reg_size(T_BYTE))); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++ ++operand vmemA_indOffI4(iRegP reg, vmemA_immIOffset4 off) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off, MUL VL]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand vmemA_indOffL4(iRegP reg, vmemA_immLOffset4 off) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off, MUL VL]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4); ++ ++source_hpp %{ ++ bool op_sve_supported(int opcode); ++%} ++ ++source %{ ++ ++ static inline BasicType vector_element_basic_type(const MachNode* n) { ++ const TypeVect* vt = n->bottom_type()->is_vect(); ++ return vt->element_basic_type(); ++ } ++ ++ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { ++ int def_idx = use->operand_index(opnd); ++ Node* def = use->in(def_idx); ++ const TypeVect* vt = def->bottom_type()->is_vect(); ++ return vt->element_basic_type(); ++ } ++ ++ typedef void (MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T, ++ PRegister Pg, const Address &adr); ++ ++ // Predicated load/store, with optional ptrue to all elements of given predicate register. ++ static void loadStoreA_predicate(MacroAssembler masm, bool is_store, ++ FloatRegister reg, PRegister pg, BasicType bt, ++ int opcode, Register base, int index, int size, int disp) { ++ sve_mem_insn_predicate insn = NULL; ++ Assembler::SIMD_RegVariant type = Assembler::B; ++ int esize = type2aelembytes(bt); ++ if (index == -1) { ++ assert(size == 0, "unsupported address mode: scale size = %d", size); ++ switch(esize) { ++ case 1: ++ insn = is_store ? &MacroAssembler::sve_st1b : &MacroAssembler::sve_ld1b; ++ type = Assembler::B; ++ break; ++ case 2: ++ insn = is_store ? &MacroAssembler::sve_st1h : &MacroAssembler::sve_ld1h; ++ type = Assembler::H; ++ break; ++ case 4: ++ insn = is_store ? &MacroAssembler::sve_st1w : &MacroAssembler::sve_ld1w; ++ type = Assembler::S; ++ break; ++ case 8: ++ insn = is_store ? &MacroAssembler::sve_st1d : &MacroAssembler::sve_ld1d; ++ type = Assembler::D; ++ break; ++ default: ++ assert(false, "unsupported"); ++ ShouldNotReachHere(); ++ } ++ (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE))); ++ } else { ++ assert(false, "unimplemented"); ++ ShouldNotReachHere(); ++ } ++ } ++ ++ bool op_sve_supported(int opcode) { ++ switch (opcode) { ++ // No multiply reduction instructions ++ case Op_MulReductionVD: ++ case Op_MulReductionVF: ++ case Op_MulReductionVI: ++ case Op_MulReductionVL: ++ // Others ++ case Op_Extract: ++ case Op_ExtractB: ++ case Op_ExtractC: ++ case Op_ExtractD: ++ case Op_ExtractF: + case Op_ExtractI: + case Op_ExtractL: + case Op_ExtractS: @@ -1657,7 +1930,7 @@ index 000000000..8d80cb37a +%} diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 new file mode 100644 -index 000000000..0323f2f8c +index 0000000..0323f2f --- /dev/null +++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 @@ -0,0 +1,727 @@ @@ -2389,7 +2662,7 @@ index 000000000..0323f2f8c +BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub) +BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub) diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp -index 8047ed8fd..32e53336b 100644 +index 8047ed8..32e5333 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp @@ -96,662 +96,746 @@ void entry(CodeBuffer *cb) { @@ -4888,529 +5161,257 @@ index 8047ed8fd..32e53336b 100644 - 0x8811fdfa, 0x885f7dcd, 0x885fff4c, 0x889ffe28, - 0x88dfffd5, 0x48007d6f, 0x4811fc34, 0x485f7d1d, - 0x485ffd91, 0x489ffc8b, 0x48dffc90, 0x080e7c85, -- 0x081bfe11, 0x085f7f66, 0x085fff1b, 0x089ffe8a, -- 0x08dfff49, 0xc87f7b85, 0xc87fa66a, 0xc82b5590, -- 0xc82adc94, 0x887f0416, 0x887f8503, 0x88205fc9, -- 0x8837c560, 0xf81e1146, 0xb81fb007, 0x381f3205, -- 0x7801f27e, 0xf8477130, 0xb843b208, 0x385f918a, -- 0x785da12e, 0x389f83d8, 0x78817087, 0x78dd91d1, -- 0xb89e136b, 0xfc4410ec, 0xbc5fe200, 0xfc15f2ed, -- 0xbc1c2075, 0xf8064ca2, 0xb81a4c29, 0x381fbfdb, -- 0x7800cdfb, 0xf852ce24, 0xb841eef5, 0x385f9e2d, -- 0x785cec19, 0x389ebea1, 0x789caebc, 0x78c02c8b, -- 0xb883dd31, 0xfc427e7d, 0xbc5abed6, 0xfc11ff29, -- 0xbc1f1c49, 0xf81be6ed, 0xb800a611, 0x381e05c1, -- 0x78006411, 0xf855473b, 0xb85da72d, 0x385e372b, -- 0x784144be, 0x389f94e9, 0x789c2460, 0x78c1f5c7, -- 0xb8827771, 0xfc515491, 0xbc4226ba, 0xfc1c7625, -- 0xbc1935ad, 0xf824da06, 0xb834db09, 0x38237ba3, -- 0x783e6a2a, 0xf867497b, 0xb87949ee, 0x387379d8, -- 0x7866c810, 0x38acd98a, 0x78b0499a, 0x78ee781a, -- 0xb8bbf971, 0xfc73d803, 0xbc6979fa, 0xfc30e9ab, -- 0xbc355a7a, 0xf91886a8, 0xb918ef6a, 0x391b15db, -- 0x791ac0f0, 0xf958753b, 0xb95a1958, 0x395b3f18, -- 0x795800b4, 0x39988891, 0x799a81ae, 0x79dd172a, -- 0xb9981342, 0xfd5d21da, 0xbd5e7c9c, 0xfd1b526e, -- 0xbd18df97, 0x58002268, 0x18ffdf51, 0xf8951080, -- 0xd8000000, 0xf8a4c900, 0xf999e180, 0x1a150374, -- 0x3a060227, 0x5a1900c5, 0x7a0e017e, 0x9a0b0223, -- 0xba110159, 0xda170207, 0xfa050144, 0x0b2973c9, -- 0x2b30a8a0, 0xcb3b8baf, 0x6b21f12b, 0x8b264f02, -- 0xab3a70d3, 0xcb39ef48, 0xeb29329a, 0x3a5a41a7, -- 0x7a54310f, 0xba4302c8, 0xfa58a04a, 0x3a50490d, -- 0x7a4c0a01, 0xba5f79e3, 0xfa4c0aef, 0x1a9a30ee, -- 0x1a9ed763, 0x5a9702ab, 0x5a95c7da, 0x9a8d835c, -- 0x9a909471, 0xda8380ab, 0xda93c461, 0x5ac00120, -- 0x5ac005da, 0x5ac00a2d, 0x5ac0128b, 0x5ac0163c, -- 0xdac0008d, 0xdac007c1, 0xdac009cd, 0xdac00d05, -- 0xdac01322, 0xdac01514, 0x1adb0b35, 0x1ad00d4d, -- 0x1ad1203c, 0x1aca26f9, 0x1ac72867, 0x1ace2fce, -- 0x9acf0acc, 0x9acd0f22, 0x9ad522e7, 0x9ac0258b, -- 0x9adc293e, 0x9ad62cad, 0x9bc47ea5, 0x9b477c51, -- 0x1b11318c, 0x1b01edfe, 0x9b117662, 0x9b03fae4, -- 0x9b313eef, 0x9b21b59b, 0x9bac45a6, 0x9ba6a839, -- 0x1e240871, 0x1e3518b0, 0x1e312b63, 0x1e2f3959, -- 0x1e200a2a, 0x1e630b5c, 0x1e7b1804, 0x1e6229dc, -- 0x1e773b4c, 0x1e610bcf, 0x1f0534a4, 0x1f1c85b5, -- 0x1f3d1c71, 0x1f3d6b37, 0x1f5e68ee, 0x1f4aa4f6, -- 0x1f6e24e7, 0x1f6f630e, 0x1e204056, 0x1e20c060, -- 0x1e214229, 0x1e21c178, 0x1e22c32f, 0x1e604064, -- 0x1e60c2da, 0x1e61427e, 0x1e61c1cc, 0x1e6240f1, -- 0x1e3801d8, 0x9e38034d, 0x1e780022, 0x9e780165, -- 0x1e22026e, 0x9e2202c1, 0x1e62023b, 0x9e620136, -- 0x1e26006e, 0x9e66022c, 0x1e270368, 0x9e67039d, -- 0x1e3e2000, 0x1e692180, 0x1e202148, 0x1e602328, -- 0x292e7b68, 0x294a4f15, 0x69626c50, 0xa93814d5, -- 0xa97e679d, 0x29903408, 0x29ec5039, 0x69fc62ce, -- 0xa98504d1, 0xa9fc4735, 0x28b05691, 0x28c8705c, -- 0x68e07953, 0xa8bf3e31, 0xa8fe0331, 0x283c170e, -- 0x284e4c37, 0xa80419cb, 0xa8722f62, 0x0c407230, -- 0x4cdfa13d, 0x0cd56f1e, 0x4cdf2440, 0x0d40c134, -- 0x4ddfc811, 0x0ddaced5, 0x4c408f33, 0x0cdf84aa, -- 0x4d60c30a, 0x0dffcbad, 0x4de2cf96, 0x4ccb489e, -- 0x0c40481d, 0x4d40e777, 0x4ddfe943, 0x0dd6edd3, -- 0x4cdf040e, 0x0cd902de, 0x0d60e019, 0x0dffe50a, -- 0x0dfce8c1, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, -+ 0x8b8e677b, 0xcb512964, 0xab998627, 0xeb9416cd, -+ 0x0b83438a, 0x4b463c55, 0x2b9b2406, 0x6b882b65, -+ 0x8a879c8c, 0xaa16cb75, 0xca80baa3, 0xea855955, -+ 0x0a1d5aad, 0x2a504951, 0x4a976cf0, 0x6a8c30ca, -+ 0x8a275b33, 0xaa27d459, 0xcab70ee9, 0xeaadc8c5, -+ 0x0a2a26af, 0x2abe06b1, 0x4a3d4f87, 0x6ab632d9, -+ 0x110c5346, 0x3107aa23, 0x5107eea5, 0x710dcf76, -+ 0x9103d10c, 0xb10e811d, 0xd10a087a, 0xf109d1fd, -+ 0x1209afd5, 0x32099d95, 0x5202c62b, 0x720897da, -+ 0x920e36f9, 0xb243f1de, 0xd263d09a, 0xf24fd01a, -+ 0x14000000, 0x17ffffd7, 0x1400023e, 0x94000000, -+ 0x97ffffd4, 0x9400023b, 0x3400001c, 0x34fffa3c, -+ 0x3400471c, 0x35000011, 0x35fff9d1, 0x350046b1, -+ 0xb4000019, 0xb4fff979, 0xb4004659, 0xb5000002, -+ 0xb5fff902, 0xb50045e2, 0x1000001d, 0x10fff8bd, -+ 0x1000459d, 0x9000001d, 0x36300006, 0x3637f826, -+ 0x36304506, 0x37100015, 0x3717f7d5, 0x371044b5, -+ 0x128155e8, 0x52a5762b, 0x72acb59a, 0x92866a8d, -+ 0xd2e2d8a6, 0xf2c54450, 0x93516bde, 0x330f3124, -+ 0x5301168f, 0x9353391b, 0xb355741e, 0xd3562f5b, -+ 0x13866d8c, 0x93d6b5b3, 0x54000000, 0x54fff5a0, -+ 0x54004280, 0x54000001, 0x54fff541, 0x54004221, -+ 0x54000002, 0x54fff4e2, 0x540041c2, 0x54000002, -+ 0x54fff482, 0x54004162, 0x54000003, 0x54fff423, -+ 0x54004103, 0x54000003, 0x54fff3c3, 0x540040a3, -+ 0x54000004, 0x54fff364, 0x54004044, 0x54000005, -+ 0x54fff305, 0x54003fe5, 0x54000006, 0x54fff2a6, -+ 0x54003f86, 0x54000007, 0x54fff247, 0x54003f27, -+ 0x54000008, 0x54fff1e8, 0x54003ec8, 0x54000009, -+ 0x54fff189, 0x54003e69, 0x5400000a, 0x54fff12a, -+ 0x54003e0a, 0x5400000b, 0x54fff0cb, 0x54003dab, -+ 0x5400000c, 0x54fff06c, 0x54003d4c, 0x5400000d, -+ 0x54fff00d, 0x54003ced, 0x5400000e, 0x54ffefae, -+ 0x54003c8e, 0x5400000f, 0x54ffef4f, 0x54003c2f, -+ 0xd407da81, 0xd402d542, 0xd406dae3, 0xd4258fa0, -+ 0xd44d5960, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, -+ 0xd5033fdf, 0xd503339f, 0xd50336bf, 0xd61f0160, -+ 0xd63f0320, 0xc80e7daf, 0xc81efc39, 0xc85f7c6d, -+ 0xc85ffea8, 0xc89fff8d, 0xc8dfffc8, 0x880d7f91, -+ 0x8815fe71, 0x885f7d03, 0x885ffebd, 0x889fff09, -+ 0x88dffcc2, 0x480c7e14, 0x4802fcbc, 0x485f7c61, -+ 0x485ffdb8, 0x489fff2f, 0x48dffe8a, 0x08057db0, -+ 0x080afe2f, 0x085f7e71, 0x085ffd3e, 0x089fff14, -+ 0x08dffc8a, 0xc87f2139, 0xc87faa07, 0xc8392d30, -+ 0xc827a5e5, 0x887f106c, 0x887f88b1, 0x882460c8, -+ 0x8824e60c, 0xf800b3ce, 0xb819f3a6, 0x381f9162, -+ 0x781ea114, 0xf85e33b4, 0xb85e6009, 0x3940204e, -+ 0x785e802d, 0x389f922d, 0x789f50f1, 0x78dc4103, -+ 0xb9800d8e, 0xfc5152a5, 0xbc5ca009, 0xfc05f10f, -+ 0xbc1f0016, 0xf8111c97, 0xb8186c11, 0x381fbd3a, -+ 0x781f8dd5, 0xf8417ce8, 0xb8416d0c, 0x38406f9b, -+ 0x785c6e66, 0x389ecca7, 0x789e0e36, 0x78dfedb1, -+ 0xb8816c9d, 0xfc5b2f88, 0xbc5fbd77, 0xfc1e9e89, -+ 0xbc199c65, 0xf802044d, 0xb803967e, 0x3800343d, -+ 0x781ef74a, 0xf85f442f, 0xb85fa4a1, 0x385f25f8, -+ 0x785fb63d, 0x389ef5e4, 0x789ca446, 0x78c1277b, -+ 0xb89b3729, 0xfc5507b5, 0xbc5ce53e, 0xfc1d2582, -+ 0xbc1c56a7, 0xf837598c, 0xb8364bce, 0x383a586c, -+ 0x783e49cb, 0xf8787918, 0xb87469ac, 0x38655896, -+ 0x786658bc, 0x38b97962, 0x78b9ead7, 0x78f6da83, -+ 0xb8aefba9, 0xfc7dfaf0, 0xbc747b87, 0xfc387a94, -+ 0xbc377ab9, 0xf9180c51, 0xb91b38fe, 0x391ca4e3, -+ 0x791a4c27, 0xf95ca767, 0xb9580e28, 0x3958ea20, -+ 0x795bd680, 0x399a4633, 0x799d80d3, 0x79dcf944, -+ 0xb99b249d, 0xfd5a143d, 0xbd59938f, 0xfd1b9347, -+ 0xbd1aa7c0, 0x58000019, 0x18000009, 0xf88692c0, -+ 0xd8ffdf00, 0xf8be7b80, 0xf99c8260, 0x1a180111, -+ 0x3a09022e, 0x5a190036, 0x7a13012f, 0x9a0b028f, -+ 0xba1e0164, 0xda060114, 0xfa0f02aa, 0x0b298d61, -+ 0x2b3cee24, 0xcb3ca7b5, 0x6b37d38b, 0x8b25f34c, -+ 0xab3e68d1, 0xcb210a87, 0xeb3eed3e, 0x3a4b0087, -+ 0x7a4571eb, 0xba5122e6, 0xfa4bc16a, 0x3a4519cc, -+ 0x7a5c1aef, 0xba5e3a27, 0xfa4c8bc0, 0x1a81537a, -+ 0x1a95d56e, 0x5a8f60de, 0x5a995451, 0x9a8780b0, -+ 0x9a9cc68a, 0xda8180e6, 0xda912756, 0x5ac000cb, -+ 0x5ac00760, 0x5ac00ba1, 0x5ac012b4, 0x5ac0158c, -+ 0xdac00278, 0xdac005f7, 0xdac00831, 0xdac00c7b, -+ 0xdac010be, 0xdac0140f, 0x1ad4080e, 0x1ad50d9b, -+ 0x1ada214c, 0x1ac6266e, 0x1ade2a7b, 0x1ad02dc6, -+ 0x9ac209b1, 0x9ac20fa0, 0x9ac2220c, 0x9add26e9, -+ 0x9add2a26, 0x9ada2fce, 0x9bda7f11, 0x9b4e7f54, -+ 0x1b021d1b, 0x1b19b1bc, 0x9b0a6d24, 0x9b08f956, -+ 0x9b391694, 0x9b2beed6, 0x9bac4cc4, 0x9ba881f1, -+ 0x1e2a08b6, 0x1e301904, 0x1e262919, 0x1e393b66, -+ 0x1e290aea, 0x1e6c0a36, 0x1e74180b, 0x1e6f2980, -+ 0x1e643acf, 0x1e79083d, 0x1f131769, 0x1f06e87a, -+ 0x1f285184, 0x1f354539, 0x1f5e5867, 0x1f4aab61, -+ 0x1f760511, 0x1f626f8e, 0x1e2043db, 0x1e20c025, -+ 0x1e214277, 0x1e21c23c, 0x1e22c0d9, 0x1e6041d4, -+ 0x1e60c151, 0x1e61422a, 0x1e61c235, 0x1e6241f5, -+ 0x1e380167, 0x9e3803a2, 0x1e780323, 0x9e78011c, -+ 0x1e22006b, 0x9e2202a2, 0x1e62033d, 0x9e620073, -+ 0x1e2603b4, 0x9e660237, 0x1e270380, 0x9e670289, -+ 0x1e2c20e0, 0x1e6e21a0, 0x1e202188, 0x1e602028, -+ 0x29380acc, 0x2966271b, 0x696a130f, 0xa9015405, -+ 0xa9735d26, 0x29820fa0, 0x29ee403d, 0x69c24ebb, -+ 0xa9b545a6, 0xa9c16020, 0x288052c0, 0x28fa31d1, -+ 0x68ce682a, 0xa8ba61b4, 0xa8c330e1, 0x28362ae5, -+ 0x287a2b08, 0xa8043d6b, 0xa84470a9, 0x0c40728b, -+ 0x4cdfa113, 0x0cc36c43, 0x4cdf2475, 0x0d40c0ae, -+ 0x4ddfcb6d, 0x0dc0ce71, 0x4c408cbb, 0x0cdf849a, -+ 0x4d60c2e8, 0x0dffc94e, 0x4df3ceaa, 0x4cde49d1, -+ 0x0c404a94, 0x4d40e6b8, 0x4ddfe83a, 0x0dc0ec4c, -+ 0x4cdf04d5, 0x0cd60391, 0x0d60e333, 0x0dffe6e6, -+ 0x0dfae928, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, - 0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f, - 0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x4e081fe1, - 0x4e0c1fe1, 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f, -- 0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000, -- 0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000, -- 0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000, -- 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, -- 0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000, -- 0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000, -- 0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000, -- 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, -- 0xf83081f4, 0xf8220387, 0xf834132a, 0xf836204b, -- 0xf821326a, 0xf82e5075, 0xf83c41bb, 0xf83172be, -- 0xf83b63b0, 0xf8be8009, 0xf8bc039b, 0xf8b51159, -- 0xf8bf21f4, 0xf8a131d9, 0xf8b553ba, 0xf8a8433d, -- 0xf8ad7322, 0xf8af6017, 0xf8e38041, 0xf8fc0283, -- 0xf8ee11df, 0xf8e7205c, 0xf8e030ab, 0xf8eb528e, -- 0xf8ff4044, 0xf8fa72c0, 0xf8f161a1, 0xf877829a, -- 0xf86e018b, 0xf86c11ff, 0xf87b210e, 0xf86a333e, -- 0xf8765207, 0xf8614110, 0xf8617341, 0xf86061f7, -- 0xb82b8110, 0xb82101c7, 0xb830113f, 0xb83621a6, -- 0xb82b308d, 0xb8305016, 0xb83c415f, 0xb8307105, -- 0xb83a61f4, 0xb8bb8206, 0xb8bf005f, 0xb8b8111c, -- 0xb8af22e9, 0xb8ba30e2, 0xb8a351f1, 0xb8b342a5, -- 0xb8a7719a, 0xb8ac63a7, 0xb8e98288, 0xb8e803df, -- 0xb8e01186, 0xb8f12057, 0xb8e0303e, 0xb8f651e3, -- 0xb8f941b5, 0xb8ed7378, 0xb8f46163, 0xb86382ad, -- 0xb87a034f, 0xb8691053, 0xb87820fd, 0xb87d31f9, -- 0xb86b50fe, 0xb86b40c2, 0xb87071cb, 0xb8656168, -+ 0x05a08020, 0x04b0e3e0, 0x0470e7e1, 0x042f9c20, -+ 0x043f9c35, 0x047f9c20, 0x04ff9c20, 0x04299420, -+ 0x04319160, 0x0461943e, 0x04a19020, 0x042053ff, -+ 0x047f5401, 0x25208028, 0x2538cfe0, 0x2578d001, -+ 0x25b8efe2, 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea, -+ 0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b, -+ 0xe400fbf6, 0xe408ffff, 0xe547e400, 0xe4014be0, -+ 0xe4a84fe0, 0xe5e85000, 0x858043e0, 0x85a043ff, -+ 0xe59f5d08, 0x1e601000, 0x1e603000, 0x1e621000, -+ 0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000, -+ 0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000, -+ 0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, -+ 0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000, -+ 0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000, -+ 0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000, -+ 0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, -+ 0x1e7e3000, 0xf82b82af, 0xf83700a8, 0xf8271106, -+ 0xf82e22ee, 0xf82a3019, 0xf82552a9, 0xf824423b, -+ 0xf82a71a6, 0xf8236203, 0xf8a9805c, 0xf8b70022, -+ 0xf8a410fa, 0xf8a02143, 0xf8b83079, 0xf8ab5028, -+ 0xf8b043ad, 0xf8a670a0, 0xf8b061b1, 0xf8eb81db, -+ 0xf8e202ad, 0xf8f6119f, 0xf8e721fe, 0xf8e731f0, -+ 0xf8f051ba, 0xf8f74379, 0xf8e473ee, 0xf8f86221, -+ 0xf8628308, 0xf874027b, 0xf87310d1, 0xf86e235c, -+ 0xf8623270, 0xf86e5090, 0xf8794128, 0xf86a73a5, -+ 0xf86661c2, 0xb831808b, 0xb82701f0, 0xb82b1139, -+ 0xb823200e, 0xb820301e, 0xb826538a, 0xb82740ce, -+ 0xb826701e, 0xb83663be, 0xb8b0826e, 0xb8b50323, -+ 0xb8a21270, 0xb8ba22f4, 0xb8b133e6, 0xb8a553d7, -+ 0xb8ab41cc, 0xb8a271b4, 0xb8af6291, 0xb8e682fc, -+ 0xb8fb01b0, 0xb8e21317, 0xb8e0215c, 0xb8e330af, -+ 0xb8e353ab, 0xb8f640db, 0xb8f17214, 0xb8f760ef, -+ 0xb86881d0, 0xb87702f0, 0xb87c10ec, 0xb87c2267, -+ 0xb867316c, 0xb86a529f, 0xb86943e8, 0xb86a7048, -+ 0xb87163ff, 0x047600e2, 0x04be06de, 0x65d902ca, -+ 0x65cc0a17, 0x65d90623, 0x0496a099, 0x04401b57, -+ 0x04d08226, 0x04daac77, 0x04939d2b, 0x04919c7b, -+ 0x04901049, 0x0417a9f0, 0x04dea929, 0x048816ea, -+ 0x040a172d, 0x04811413, 0x04dca2d1, 0x65808a09, -+ 0x658d9411, 0x6586947d, 0x65878e21, 0x65c2880e, -+ 0x04ddb2d3, 0x65c2a5f1, 0x65c0b088, 0x65c1b3a5, -+ 0x65cda26b, 0x65c1938a, 0x65eb0ded, 0x65af3e86, -+ 0x65a749be, 0x65f379d6, 0x04404f3e, 0x04c16b0a, -+ 0x04363226, 0x04b1312a, 0x04753182, 0x049a39cf, -+ 0x04d82ce9, 0x0459353e, 0x04883347, 0x048a2fb4, -+ 0x65872e1c, 0x65c62d26, 0x6598346a, 0x04013915, - - }; - // END Generated code -- do not edit -diff --git a/src/hotspot/cpu/aarch64/aarch64-asmtest.py b/src/hotspot/cpu/aarch64/aarch64-asmtest.py -index 31c6965b7..2211bd25a 100644 ---- a/src/hotspot/cpu/aarch64/aarch64-asmtest.py -+++ b/src/hotspot/cpu/aarch64/aarch64-asmtest.py -@@ -73,6 +73,48 @@ class GeneralRegisterOrSp(Register): - return self.astr() - else: - return self.astr("r") -+class SVEVectorRegister(FloatRegister): -+ def __str__(self): -+ return self.astr("z") -+ -+class SVEPRegister(Register): -+ def __str__(self): -+ return self.astr("p") -+ -+ def generate(self): -+ self.number = random.randint(0, 15) -+ return self -+ -+class SVEGoverningPRegister(Register): -+ def __str__(self): -+ return self.astr("p") -+ def generate(self): -+ self.number = random.randint(0, 7) -+ return self -+ -+class RegVariant(object): -+ def __init__(self, low, high): -+ self.number = random.randint(low, high) -+ -+ def astr(self): -+ nameMap = { -+ 0: ".b", -+ 1: ".h", -+ 2: ".s", -+ 3: ".d", -+ 4: ".q" -+ } -+ return nameMap.get(self.number) -+ -+ def cstr(self): -+ nameMap = { -+ 0: "__ B", -+ 1: "__ H", -+ 2: "__ S", -+ 3: "__ D", -+ 4: "__ Q" -+ } -+ return nameMap.get(self.number) - - class FloatZero(Operand): - -@@ -88,7 +130,10 @@ class OperandFactory: - 'w' : GeneralRegister, - 's' : FloatRegister, - 'd' : FloatRegister, -- 'z' : FloatZero} -+ 'z' : FloatZero, -+ 'p' : SVEPRegister, -+ 'P' : SVEGoverningPRegister, -+ 'Z' : SVEVectorRegister} - - @classmethod - def create(cls, mode): -@@ -834,6 +879,100 @@ class FloatInstruction(Instruction): - % tuple([Instruction.astr(self)] + - [(self.reg[i].astr(self.modes[i])) for i in range(self.numRegs)])) - -+class SVEVectorOp(Instruction): -+ def __init__(self, args): -+ name = args[0] -+ regTypes = args[1] -+ regs = [] -+ for c in regTypes: -+ regs.append(OperandFactory.create(c).generate()) -+ self.reg = regs -+ self.numRegs = len(regs) -+ if regTypes[0] != "p" and regTypes[1] == 'P': -+ self._isPredicated = True -+ self._merge = "/m" -+ else: -+ self._isPredicated = False -+ self._merge ="" -+ -+ self._bitwiseop = False -+ if name[0] == 'f': -+ self._width = RegVariant(2, 3) -+ elif not self._isPredicated and (name in ["and", "eor", "orr", "bic"]): -+ self._width = RegVariant(3, 3) -+ self._bitwiseop = True -+ else: -+ self._width = RegVariant(0, 3) -+ if len(args) > 2: -+ self._dnm = args[2] -+ else: -+ self._dnm = None -+ Instruction.__init__(self, name) -+ -+ def cstr(self): -+ formatStr = "%s%s" + ''.join([", %s" for i in range(0, self.numRegs)] + [");"]) -+ if self._bitwiseop: -+ width = [] -+ formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)] + [");"]) -+ else: -+ width = [self._width.cstr()] -+ return (formatStr -+ % tuple(["__ sve_" + self._name + "("] + -+ [str(self.reg[0])] + -+ width + -+ [str(self.reg[i]) for i in range(1, self.numRegs)])) -+ def astr(self): -+ formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)]) -+ if self._dnm == 'dn': -+ formatStr += ", %s" -+ dnReg = [str(self.reg[0]) + self._width.astr()] -+ else: -+ dnReg = [] -+ -+ if self._isPredicated: -+ restRegs = [str(self.reg[1]) + self._merge] + dnReg + [str(self.reg[i]) + self._width.astr() for i in range(2, self.numRegs)] -+ else: -+ restRegs = dnReg + [str(self.reg[i]) + self._width.astr() for i in range(1, self.numRegs)] -+ return (formatStr -+ % tuple([Instruction.astr(self)] + -+ [str(self.reg[0]) + self._width.astr()] + -+ restRegs)) -+ def generate(self): -+ return self -+ -+class SVEReductionOp(Instruction): -+ def __init__(self, args): -+ name = args[0] -+ lowRegType = args[1] -+ self.reg = [] -+ Instruction.__init__(self, name) -+ self.reg.append(OperandFactory.create('s').generate()) -+ self.reg.append(OperandFactory.create('P').generate()) -+ self.reg.append(OperandFactory.create('Z').generate()) -+ self._width = RegVariant(lowRegType, 3) -+ def cstr(self): -+ return "__ sve_%s(%s, %s, %s, %s);" % (self.name(), -+ str(self.reg[0]), -+ self._width.cstr(), -+ str(self.reg[1]), -+ str(self.reg[2])) -+ def astr(self): -+ if self.name() == "uaddv": -+ dstRegName = "d" + str(self.reg[0].number) -+ else: -+ dstRegName = self._width.astr()[1] + str(self.reg[0].number) -+ formatStr = "%s %s, %s, %s" -+ if self.name() == "fadda": -+ formatStr += ", %s" -+ moreReg = [dstRegName] -+ else: -+ moreReg = [] -+ return formatStr % tuple([self.name()] + -+ [dstRegName] + -+ [str(self.reg[1])] + -+ moreReg + -+ [str(self.reg[2]) + self._width.astr()]) -+ - class LdStSIMDOp(Instruction): - def __init__(self, args): - self._name, self.regnum, self.arrangement, self.addresskind = args -@@ -1120,7 +1259,42 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", - ["mov", "__ mov(v1, __ T2S, 1, zr);", "mov\tv1.s[1], wzr"], - ["mov", "__ mov(v1, __ T4H, 2, zr);", "mov\tv1.h[2], wzr"], - ["mov", "__ mov(v1, __ T8B, 3, zr);", "mov\tv1.b[3], wzr"], -- ["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"]]) -+ ["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"], -+ # SVE instructions -+ ["cpy", "__ sve_cpy(z0, __ S, p0, v1);", "mov\tz0.s, p0/m, s1"], -+ ["inc", "__ sve_inc(r0, __ S);", "incw\tx0"], -+ ["dec", "__ sve_dec(r1, __ H);", "dech\tx1"], -+ ["lsl", "__ sve_lsl(z0, __ B, z1, 7);", "lsl\tz0.b, z1.b, #7"], -+ ["lsl", "__ sve_lsl(z21, __ H, z1, 15);", "lsl\tz21.h, z1.h, #15"], -+ ["lsl", "__ sve_lsl(z0, __ S, z1, 31);", "lsl\tz0.s, z1.s, #31"], -+ ["lsl", "__ sve_lsl(z0, __ D, z1, 63);", "lsl\tz0.d, z1.d, #63"], -+ ["lsr", "__ sve_lsr(z0, __ B, z1, 7);", "lsr\tz0.b, z1.b, #7"], -+ ["asr", "__ sve_asr(z0, __ H, z11, 15);", "asr\tz0.h, z11.h, #15"], -+ ["lsr", "__ sve_lsr(z30, __ S, z1, 31);", "lsr\tz30.s, z1.s, #31"], -+ ["asr", "__ sve_asr(z0, __ D, z1, 63);", "asr\tz0.d, z1.d, #63"], -+ ["addvl", "__ sve_addvl(sp, r0, 31);", "addvl\tsp, x0, #31"], -+ ["addpl", "__ sve_addpl(r1, sp, -32);", "addpl\tx1, sp, -32"], -+ ["cntp", "__ sve_cntp(r8, __ B, p0, p1);", "cntp\tx8, p0, p1.b"], -+ ["dup", "__ sve_dup(z0, __ B, 127);", "dup\tz0.b, 127"], -+ ["dup", "__ sve_dup(z1, __ H, -128);", "dup\tz1.h, -128"], -+ ["dup", "__ sve_dup(z2, __ S, 32512);", "dup\tz2.s, 32512"], -+ ["dup", "__ sve_dup(z7, __ D, -32768);", "dup\tz7.d, -32768"], -+ ["ld1b", "__ sve_ld1b(z0, __ B, p0, Address(sp));", "ld1b\t{z0.b}, p0/z, [sp]"], -+ ["ld1h", "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));", "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"], -+ ["ld1w", "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));", "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"], -+ ["ld1b", "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));", "ld1b\t{z30.b}, p3/z, [sp, x8]"], -+ ["ld1w", "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));", "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"], -+ ["ld1d", "__ sve_ld1d(z11, __ D, p5, Address(r0, r1));", "ld1d\t{z11.d}, p5/z, [x0, x1, LSL #3]"], -+ ["st1b", "__ sve_st1b(z22, __ B, p6, Address(sp));", "st1b\t{z22.b}, p6, [sp]"], -+ ["st1b", "__ sve_st1b(z31, __ B, p7, Address(sp, -8));", "st1b\t{z31.b}, p7, [sp, #-8, MUL VL]"], -+ ["st1w", "__ sve_st1w(z0, __ S, p1, Address(r0, 7));", "st1w\t{z0.s}, p1, [x0, #7, MUL VL]"], -+ ["st1b", "__ sve_st1b(z0, __ B, p2, Address(sp, r1));", "st1b\t{z0.b}, p2, [sp, x1]"], -+ ["st1h", "__ sve_st1h(z0, __ H, p3, Address(sp, r8));", "st1h\t{z0.h}, p3, [sp, x8, LSL #1]"], -+ ["st1d", "__ sve_st1d(z0, __ D, p4, Address(r0, r8));", "st1d\t{z0.d}, p4, [x0, x8, LSL #3]"], -+ ["ldr", "__ sve_ldr(z0, Address(sp));", "ldr\tz0, [sp]"], -+ ["ldr", "__ sve_ldr(z31, Address(sp, -256));", "ldr\tz31, [sp, #-256, MUL VL]"], -+ ["str", "__ sve_str(z8, Address(r8, 255));", "str\tz8, [x8, #255, MUL VL]"], -+]) - - print "\n// FloatImmediateOp" - for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125", -@@ -1145,6 +1319,50 @@ for size in ("x", "w"): - ["ldumin", "ldumin", size, suffix], - ["ldumax", "ldumax", size, suffix]]); - -+ -+generate(SVEVectorOp, [["add", "ZZZ"], -+ ["sub", "ZZZ"], -+ ["fadd", "ZZZ"], -+ ["fmul", "ZZZ"], -+ ["fsub", "ZZZ"], -+ ["abs", "ZPZ"], -+ ["add", "ZPZ", "dn"], -+ ["asr", "ZPZ", "dn"], -+ ["cnt", "ZPZ"], -+ ["lsl", "ZPZ", "dn"], -+ ["lsr", "ZPZ", "dn"], -+ ["mul", "ZPZ", "dn"], -+ ["neg", "ZPZ"], -+ ["not", "ZPZ"], -+ ["smax", "ZPZ", "dn"], -+ ["smin", "ZPZ", "dn"], -+ ["sub", "ZPZ", "dn"], -+ ["fabs", "ZPZ"], -+ ["fadd", "ZPZ", "dn"], -+ ["fdiv", "ZPZ", "dn"], -+ ["fmax", "ZPZ", "dn"], -+ ["fmin", "ZPZ", "dn"], -+ ["fmul", "ZPZ", "dn"], -+ ["fneg", "ZPZ"], -+ ["frintm", "ZPZ"], -+ ["frintn", "ZPZ"], -+ ["frintp", "ZPZ"], -+ ["fsqrt", "ZPZ"], -+ ["fsub", "ZPZ", "dn"], -+ ["fmla", "ZPZZ"], -+ ["fmls", "ZPZZ"], -+ ["fnmla", "ZPZZ"], -+ ["fnmls", "ZPZZ"], -+ ["mla", "ZPZZ"], -+ ["mls", "ZPZZ"], -+ ["and", "ZZZ"], -+ ["eor", "ZZZ"], -+ ["orr", "ZZZ"], -+ ]) -+ -+generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0], -+ ["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]]) -+ - print "\n __ bind(forth);" - outfile.write("forth:\n") - -@@ -1153,8 +1372,8 @@ outfile.close() - import subprocess - import sys - --# compile for 8.1 because of lse atomics --subprocess.check_call([AARCH64_AS, "-march=armv8.1-a", "aarch64ops.s", "-o", "aarch64ops.o"]) -+# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension. -+subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"]) +- 0x081bfe11, 0x085f7f66, 0x085fff1b, 0x089ffe8a, +- 0x08dfff49, 0xc87f7b85, 0xc87fa66a, 0xc82b5590, +- 0xc82adc94, 0x887f0416, 0x887f8503, 0x88205fc9, +- 0x8837c560, 0xf81e1146, 0xb81fb007, 0x381f3205, +- 0x7801f27e, 0xf8477130, 0xb843b208, 0x385f918a, +- 0x785da12e, 0x389f83d8, 0x78817087, 0x78dd91d1, +- 0xb89e136b, 0xfc4410ec, 0xbc5fe200, 0xfc15f2ed, +- 0xbc1c2075, 0xf8064ca2, 0xb81a4c29, 0x381fbfdb, +- 0x7800cdfb, 0xf852ce24, 0xb841eef5, 0x385f9e2d, +- 0x785cec19, 0x389ebea1, 0x789caebc, 0x78c02c8b, +- 0xb883dd31, 0xfc427e7d, 0xbc5abed6, 0xfc11ff29, +- 0xbc1f1c49, 0xf81be6ed, 0xb800a611, 0x381e05c1, +- 0x78006411, 0xf855473b, 0xb85da72d, 0x385e372b, +- 0x784144be, 0x389f94e9, 0x789c2460, 0x78c1f5c7, +- 0xb8827771, 0xfc515491, 0xbc4226ba, 0xfc1c7625, +- 0xbc1935ad, 0xf824da06, 0xb834db09, 0x38237ba3, +- 0x783e6a2a, 0xf867497b, 0xb87949ee, 0x387379d8, +- 0x7866c810, 0x38acd98a, 0x78b0499a, 0x78ee781a, +- 0xb8bbf971, 0xfc73d803, 0xbc6979fa, 0xfc30e9ab, +- 0xbc355a7a, 0xf91886a8, 0xb918ef6a, 0x391b15db, +- 0x791ac0f0, 0xf958753b, 0xb95a1958, 0x395b3f18, +- 0x795800b4, 0x39988891, 0x799a81ae, 0x79dd172a, +- 0xb9981342, 0xfd5d21da, 0xbd5e7c9c, 0xfd1b526e, +- 0xbd18df97, 0x58002268, 0x18ffdf51, 0xf8951080, +- 0xd8000000, 0xf8a4c900, 0xf999e180, 0x1a150374, +- 0x3a060227, 0x5a1900c5, 0x7a0e017e, 0x9a0b0223, +- 0xba110159, 0xda170207, 0xfa050144, 0x0b2973c9, +- 0x2b30a8a0, 0xcb3b8baf, 0x6b21f12b, 0x8b264f02, +- 0xab3a70d3, 0xcb39ef48, 0xeb29329a, 0x3a5a41a7, +- 0x7a54310f, 0xba4302c8, 0xfa58a04a, 0x3a50490d, +- 0x7a4c0a01, 0xba5f79e3, 0xfa4c0aef, 0x1a9a30ee, +- 0x1a9ed763, 0x5a9702ab, 0x5a95c7da, 0x9a8d835c, +- 0x9a909471, 0xda8380ab, 0xda93c461, 0x5ac00120, +- 0x5ac005da, 0x5ac00a2d, 0x5ac0128b, 0x5ac0163c, +- 0xdac0008d, 0xdac007c1, 0xdac009cd, 0xdac00d05, +- 0xdac01322, 0xdac01514, 0x1adb0b35, 0x1ad00d4d, +- 0x1ad1203c, 0x1aca26f9, 0x1ac72867, 0x1ace2fce, +- 0x9acf0acc, 0x9acd0f22, 0x9ad522e7, 0x9ac0258b, +- 0x9adc293e, 0x9ad62cad, 0x9bc47ea5, 0x9b477c51, +- 0x1b11318c, 0x1b01edfe, 0x9b117662, 0x9b03fae4, +- 0x9b313eef, 0x9b21b59b, 0x9bac45a6, 0x9ba6a839, +- 0x1e240871, 0x1e3518b0, 0x1e312b63, 0x1e2f3959, +- 0x1e200a2a, 0x1e630b5c, 0x1e7b1804, 0x1e6229dc, +- 0x1e773b4c, 0x1e610bcf, 0x1f0534a4, 0x1f1c85b5, +- 0x1f3d1c71, 0x1f3d6b37, 0x1f5e68ee, 0x1f4aa4f6, +- 0x1f6e24e7, 0x1f6f630e, 0x1e204056, 0x1e20c060, +- 0x1e214229, 0x1e21c178, 0x1e22c32f, 0x1e604064, +- 0x1e60c2da, 0x1e61427e, 0x1e61c1cc, 0x1e6240f1, +- 0x1e3801d8, 0x9e38034d, 0x1e780022, 0x9e780165, +- 0x1e22026e, 0x9e2202c1, 0x1e62023b, 0x9e620136, +- 0x1e26006e, 0x9e66022c, 0x1e270368, 0x9e67039d, +- 0x1e3e2000, 0x1e692180, 0x1e202148, 0x1e602328, +- 0x292e7b68, 0x294a4f15, 0x69626c50, 0xa93814d5, +- 0xa97e679d, 0x29903408, 0x29ec5039, 0x69fc62ce, +- 0xa98504d1, 0xa9fc4735, 0x28b05691, 0x28c8705c, +- 0x68e07953, 0xa8bf3e31, 0xa8fe0331, 0x283c170e, +- 0x284e4c37, 0xa80419cb, 0xa8722f62, 0x0c407230, +- 0x4cdfa13d, 0x0cd56f1e, 0x4cdf2440, 0x0d40c134, +- 0x4ddfc811, 0x0ddaced5, 0x4c408f33, 0x0cdf84aa, +- 0x4d60c30a, 0x0dffcbad, 0x4de2cf96, 0x4ccb489e, +- 0x0c40481d, 0x4d40e777, 0x4ddfe943, 0x0dd6edd3, +- 0x4cdf040e, 0x0cd902de, 0x0d60e019, 0x0dffe50a, +- 0x0dfce8c1, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, ++ 0x8b8e677b, 0xcb512964, 0xab998627, 0xeb9416cd, ++ 0x0b83438a, 0x4b463c55, 0x2b9b2406, 0x6b882b65, ++ 0x8a879c8c, 0xaa16cb75, 0xca80baa3, 0xea855955, ++ 0x0a1d5aad, 0x2a504951, 0x4a976cf0, 0x6a8c30ca, ++ 0x8a275b33, 0xaa27d459, 0xcab70ee9, 0xeaadc8c5, ++ 0x0a2a26af, 0x2abe06b1, 0x4a3d4f87, 0x6ab632d9, ++ 0x110c5346, 0x3107aa23, 0x5107eea5, 0x710dcf76, ++ 0x9103d10c, 0xb10e811d, 0xd10a087a, 0xf109d1fd, ++ 0x1209afd5, 0x32099d95, 0x5202c62b, 0x720897da, ++ 0x920e36f9, 0xb243f1de, 0xd263d09a, 0xf24fd01a, ++ 0x14000000, 0x17ffffd7, 0x1400023e, 0x94000000, ++ 0x97ffffd4, 0x9400023b, 0x3400001c, 0x34fffa3c, ++ 0x3400471c, 0x35000011, 0x35fff9d1, 0x350046b1, ++ 0xb4000019, 0xb4fff979, 0xb4004659, 0xb5000002, ++ 0xb5fff902, 0xb50045e2, 0x1000001d, 0x10fff8bd, ++ 0x1000459d, 0x9000001d, 0x36300006, 0x3637f826, ++ 0x36304506, 0x37100015, 0x3717f7d5, 0x371044b5, ++ 0x128155e8, 0x52a5762b, 0x72acb59a, 0x92866a8d, ++ 0xd2e2d8a6, 0xf2c54450, 0x93516bde, 0x330f3124, ++ 0x5301168f, 0x9353391b, 0xb355741e, 0xd3562f5b, ++ 0x13866d8c, 0x93d6b5b3, 0x54000000, 0x54fff5a0, ++ 0x54004280, 0x54000001, 0x54fff541, 0x54004221, ++ 0x54000002, 0x54fff4e2, 0x540041c2, 0x54000002, ++ 0x54fff482, 0x54004162, 0x54000003, 0x54fff423, ++ 0x54004103, 0x54000003, 0x54fff3c3, 0x540040a3, ++ 0x54000004, 0x54fff364, 0x54004044, 0x54000005, ++ 0x54fff305, 0x54003fe5, 0x54000006, 0x54fff2a6, ++ 0x54003f86, 0x54000007, 0x54fff247, 0x54003f27, ++ 0x54000008, 0x54fff1e8, 0x54003ec8, 0x54000009, ++ 0x54fff189, 0x54003e69, 0x5400000a, 0x54fff12a, ++ 0x54003e0a, 0x5400000b, 0x54fff0cb, 0x54003dab, ++ 0x5400000c, 0x54fff06c, 0x54003d4c, 0x5400000d, ++ 0x54fff00d, 0x54003ced, 0x5400000e, 0x54ffefae, ++ 0x54003c8e, 0x5400000f, 0x54ffef4f, 0x54003c2f, ++ 0xd407da81, 0xd402d542, 0xd406dae3, 0xd4258fa0, ++ 0xd44d5960, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, ++ 0xd5033fdf, 0xd503339f, 0xd50336bf, 0xd61f0160, ++ 0xd63f0320, 0xc80e7daf, 0xc81efc39, 0xc85f7c6d, ++ 0xc85ffea8, 0xc89fff8d, 0xc8dfffc8, 0x880d7f91, ++ 0x8815fe71, 0x885f7d03, 0x885ffebd, 0x889fff09, ++ 0x88dffcc2, 0x480c7e14, 0x4802fcbc, 0x485f7c61, ++ 0x485ffdb8, 0x489fff2f, 0x48dffe8a, 0x08057db0, ++ 0x080afe2f, 0x085f7e71, 0x085ffd3e, 0x089fff14, ++ 0x08dffc8a, 0xc87f2139, 0xc87faa07, 0xc8392d30, ++ 0xc827a5e5, 0x887f106c, 0x887f88b1, 0x882460c8, ++ 0x8824e60c, 0xf800b3ce, 0xb819f3a6, 0x381f9162, ++ 0x781ea114, 0xf85e33b4, 0xb85e6009, 0x3940204e, ++ 0x785e802d, 0x389f922d, 0x789f50f1, 0x78dc4103, ++ 0xb9800d8e, 0xfc5152a5, 0xbc5ca009, 0xfc05f10f, ++ 0xbc1f0016, 0xf8111c97, 0xb8186c11, 0x381fbd3a, ++ 0x781f8dd5, 0xf8417ce8, 0xb8416d0c, 0x38406f9b, ++ 0x785c6e66, 0x389ecca7, 0x789e0e36, 0x78dfedb1, ++ 0xb8816c9d, 0xfc5b2f88, 0xbc5fbd77, 0xfc1e9e89, ++ 0xbc199c65, 0xf802044d, 0xb803967e, 0x3800343d, ++ 0x781ef74a, 0xf85f442f, 0xb85fa4a1, 0x385f25f8, ++ 0x785fb63d, 0x389ef5e4, 0x789ca446, 0x78c1277b, ++ 0xb89b3729, 0xfc5507b5, 0xbc5ce53e, 0xfc1d2582, ++ 0xbc1c56a7, 0xf837598c, 0xb8364bce, 0x383a586c, ++ 0x783e49cb, 0xf8787918, 0xb87469ac, 0x38655896, ++ 0x786658bc, 0x38b97962, 0x78b9ead7, 0x78f6da83, ++ 0xb8aefba9, 0xfc7dfaf0, 0xbc747b87, 0xfc387a94, ++ 0xbc377ab9, 0xf9180c51, 0xb91b38fe, 0x391ca4e3, ++ 0x791a4c27, 0xf95ca767, 0xb9580e28, 0x3958ea20, ++ 0x795bd680, 0x399a4633, 0x799d80d3, 0x79dcf944, ++ 0xb99b249d, 0xfd5a143d, 0xbd59938f, 0xfd1b9347, ++ 0xbd1aa7c0, 0x58000019, 0x18000009, 0xf88692c0, ++ 0xd8ffdf00, 0xf8be7b80, 0xf99c8260, 0x1a180111, ++ 0x3a09022e, 0x5a190036, 0x7a13012f, 0x9a0b028f, ++ 0xba1e0164, 0xda060114, 0xfa0f02aa, 0x0b298d61, ++ 0x2b3cee24, 0xcb3ca7b5, 0x6b37d38b, 0x8b25f34c, ++ 0xab3e68d1, 0xcb210a87, 0xeb3eed3e, 0x3a4b0087, ++ 0x7a4571eb, 0xba5122e6, 0xfa4bc16a, 0x3a4519cc, ++ 0x7a5c1aef, 0xba5e3a27, 0xfa4c8bc0, 0x1a81537a, ++ 0x1a95d56e, 0x5a8f60de, 0x5a995451, 0x9a8780b0, ++ 0x9a9cc68a, 0xda8180e6, 0xda912756, 0x5ac000cb, ++ 0x5ac00760, 0x5ac00ba1, 0x5ac012b4, 0x5ac0158c, ++ 0xdac00278, 0xdac005f7, 0xdac00831, 0xdac00c7b, ++ 0xdac010be, 0xdac0140f, 0x1ad4080e, 0x1ad50d9b, ++ 0x1ada214c, 0x1ac6266e, 0x1ade2a7b, 0x1ad02dc6, ++ 0x9ac209b1, 0x9ac20fa0, 0x9ac2220c, 0x9add26e9, ++ 0x9add2a26, 0x9ada2fce, 0x9bda7f11, 0x9b4e7f54, ++ 0x1b021d1b, 0x1b19b1bc, 0x9b0a6d24, 0x9b08f956, ++ 0x9b391694, 0x9b2beed6, 0x9bac4cc4, 0x9ba881f1, ++ 0x1e2a08b6, 0x1e301904, 0x1e262919, 0x1e393b66, ++ 0x1e290aea, 0x1e6c0a36, 0x1e74180b, 0x1e6f2980, ++ 0x1e643acf, 0x1e79083d, 0x1f131769, 0x1f06e87a, ++ 0x1f285184, 0x1f354539, 0x1f5e5867, 0x1f4aab61, ++ 0x1f760511, 0x1f626f8e, 0x1e2043db, 0x1e20c025, ++ 0x1e214277, 0x1e21c23c, 0x1e22c0d9, 0x1e6041d4, ++ 0x1e60c151, 0x1e61422a, 0x1e61c235, 0x1e6241f5, ++ 0x1e380167, 0x9e3803a2, 0x1e780323, 0x9e78011c, ++ 0x1e22006b, 0x9e2202a2, 0x1e62033d, 0x9e620073, ++ 0x1e2603b4, 0x9e660237, 0x1e270380, 0x9e670289, ++ 0x1e2c20e0, 0x1e6e21a0, 0x1e202188, 0x1e602028, ++ 0x29380acc, 0x2966271b, 0x696a130f, 0xa9015405, ++ 0xa9735d26, 0x29820fa0, 0x29ee403d, 0x69c24ebb, ++ 0xa9b545a6, 0xa9c16020, 0x288052c0, 0x28fa31d1, ++ 0x68ce682a, 0xa8ba61b4, 0xa8c330e1, 0x28362ae5, ++ 0x287a2b08, 0xa8043d6b, 0xa84470a9, 0x0c40728b, ++ 0x4cdfa113, 0x0cc36c43, 0x4cdf2475, 0x0d40c0ae, ++ 0x4ddfcb6d, 0x0dc0ce71, 0x4c408cbb, 0x0cdf849a, ++ 0x4d60c2e8, 0x0dffc94e, 0x4df3ceaa, 0x4cde49d1, ++ 0x0c404a94, 0x4d40e6b8, 0x4ddfe83a, 0x0dc0ec4c, ++ 0x4cdf04d5, 0x0cd60391, 0x0d60e333, 0x0dffe6e6, ++ 0x0dfae928, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, + 0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f, + 0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x4e081fe1, + 0x4e0c1fe1, 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f, +- 0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000, +- 0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000, +- 0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000, +- 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, +- 0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000, +- 0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000, +- 0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000, +- 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, +- 0xf83081f4, 0xf8220387, 0xf834132a, 0xf836204b, +- 0xf821326a, 0xf82e5075, 0xf83c41bb, 0xf83172be, +- 0xf83b63b0, 0xf8be8009, 0xf8bc039b, 0xf8b51159, +- 0xf8bf21f4, 0xf8a131d9, 0xf8b553ba, 0xf8a8433d, +- 0xf8ad7322, 0xf8af6017, 0xf8e38041, 0xf8fc0283, +- 0xf8ee11df, 0xf8e7205c, 0xf8e030ab, 0xf8eb528e, +- 0xf8ff4044, 0xf8fa72c0, 0xf8f161a1, 0xf877829a, +- 0xf86e018b, 0xf86c11ff, 0xf87b210e, 0xf86a333e, +- 0xf8765207, 0xf8614110, 0xf8617341, 0xf86061f7, +- 0xb82b8110, 0xb82101c7, 0xb830113f, 0xb83621a6, +- 0xb82b308d, 0xb8305016, 0xb83c415f, 0xb8307105, +- 0xb83a61f4, 0xb8bb8206, 0xb8bf005f, 0xb8b8111c, +- 0xb8af22e9, 0xb8ba30e2, 0xb8a351f1, 0xb8b342a5, +- 0xb8a7719a, 0xb8ac63a7, 0xb8e98288, 0xb8e803df, +- 0xb8e01186, 0xb8f12057, 0xb8e0303e, 0xb8f651e3, +- 0xb8f941b5, 0xb8ed7378, 0xb8f46163, 0xb86382ad, +- 0xb87a034f, 0xb8691053, 0xb87820fd, 0xb87d31f9, +- 0xb86b50fe, 0xb86b40c2, 0xb87071cb, 0xb8656168, ++ 0x05a08020, 0x04b0e3e0, 0x0470e7e1, 0x042f9c20, ++ 0x043f9c35, 0x047f9c20, 0x04ff9c20, 0x04299420, ++ 0x04319160, 0x0461943e, 0x04a19020, 0x042053ff, ++ 0x047f5401, 0x25208028, 0x2538cfe0, 0x2578d001, ++ 0x25b8efe2, 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea, ++ 0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b, ++ 0xe400fbf6, 0xe408ffff, 0xe547e400, 0xe4014be0, ++ 0xe4a84fe0, 0xe5e85000, 0x858043e0, 0x85a043ff, ++ 0xe59f5d08, 0x1e601000, 0x1e603000, 0x1e621000, ++ 0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000, ++ 0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000, ++ 0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, ++ 0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000, ++ 0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000, ++ 0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000, ++ 0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, ++ 0x1e7e3000, 0xf82b82af, 0xf83700a8, 0xf8271106, ++ 0xf82e22ee, 0xf82a3019, 0xf82552a9, 0xf824423b, ++ 0xf82a71a6, 0xf8236203, 0xf8a9805c, 0xf8b70022, ++ 0xf8a410fa, 0xf8a02143, 0xf8b83079, 0xf8ab5028, ++ 0xf8b043ad, 0xf8a670a0, 0xf8b061b1, 0xf8eb81db, ++ 0xf8e202ad, 0xf8f6119f, 0xf8e721fe, 0xf8e731f0, ++ 0xf8f051ba, 0xf8f74379, 0xf8e473ee, 0xf8f86221, ++ 0xf8628308, 0xf874027b, 0xf87310d1, 0xf86e235c, ++ 0xf8623270, 0xf86e5090, 0xf8794128, 0xf86a73a5, ++ 0xf86661c2, 0xb831808b, 0xb82701f0, 0xb82b1139, ++ 0xb823200e, 0xb820301e, 0xb826538a, 0xb82740ce, ++ 0xb826701e, 0xb83663be, 0xb8b0826e, 0xb8b50323, ++ 0xb8a21270, 0xb8ba22f4, 0xb8b133e6, 0xb8a553d7, ++ 0xb8ab41cc, 0xb8a271b4, 0xb8af6291, 0xb8e682fc, ++ 0xb8fb01b0, 0xb8e21317, 0xb8e0215c, 0xb8e330af, ++ 0xb8e353ab, 0xb8f640db, 0xb8f17214, 0xb8f760ef, ++ 0xb86881d0, 0xb87702f0, 0xb87c10ec, 0xb87c2267, ++ 0xb867316c, 0xb86a529f, 0xb86943e8, 0xb86a7048, ++ 0xb87163ff, 0x047600e2, 0x04be06de, 0x65d902ca, ++ 0x65cc0a17, 0x65d90623, 0x0496a099, 0x04401b57, ++ 0x04d08226, 0x04daac77, 0x04939d2b, 0x04919c7b, ++ 0x04901049, 0x0417a9f0, 0x04dea929, 0x048816ea, ++ 0x040a172d, 0x04811413, 0x04dca2d1, 0x65808a09, ++ 0x658d9411, 0x6586947d, 0x65878e21, 0x65c2880e, ++ 0x04ddb2d3, 0x65c2a5f1, 0x65c0b088, 0x65c1b3a5, ++ 0x65cda26b, 0x65c1938a, 0x65eb0ded, 0x65af3e86, ++ 0x65a749be, 0x65f379d6, 0x04404f3e, 0x04c16b0a, ++ 0x04363226, 0x04b1312a, 0x04753182, 0x049a39cf, ++ 0x04d82ce9, 0x0459353e, 0x04883347, 0x048a2fb4, ++ 0x65872e1c, 0x65c62d26, 0x6598346a, 0x04013915, - print - print "/*", + }; + // END Generated code -- do not edit diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp -index 6f4e75ff4..dc2d5e2c9 100644 +index 8f0d7f5..13daa4e 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp -@@ -139,6 +139,9 @@ REGISTER_DECLARATION(Register, rdispatch, r21); +@@ -152,6 +152,9 @@ REGISTER_DECLARATION(Register, rdispatch, r21); // Java stack pointer REGISTER_DECLARATION(Register, esp, r20); @@ -5420,7 +5421,7 @@ index 6f4e75ff4..dc2d5e2c9 100644 #define assert_cond(ARG1) assert(ARG1, #ARG1) namespace asm_util { -@@ -562,6 +565,18 @@ class Address { +@@ -581,6 +584,18 @@ class Address { void lea(MacroAssembler *, Register) const; static bool offset_ok_for_immed(int64_t offset, uint shift = 0); @@ -5439,7 +5440,7 @@ index 6f4e75ff4..dc2d5e2c9 100644 }; // Convience classes -@@ -2445,13 +2460,18 @@ public: +@@ -2473,13 +2488,18 @@ public: f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); } @@ -5463,7 +5464,7 @@ index 6f4e75ff4..dc2d5e2c9 100644 #define INSN(NAME, opc, opc2, isSHR) \ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \ starti; \ -@@ -2693,6 +2713,240 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V +@@ -2721,6 +2741,240 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V f(0, 10), rf(Vn, 5), rf(Vd, 0); } @@ -5704,7 +5705,7 @@ index 6f4e75ff4..dc2d5e2c9 100644 // SVE inc/dec register by element count #define INSN(NAME, op) \ void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \ -@@ -2706,6 +2960,45 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V +@@ -2734,6 +2988,45 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V INSN(sve_dec, 1); #undef INSN @@ -5751,7 +5752,7 @@ index 6f4e75ff4..dc2d5e2c9 100644 } diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp -index 6ac54f257..a258528ea 100644 +index 6ac54f2..a258528 100644 --- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp @@ -456,8 +456,12 @@ void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, Z @@ -5769,10 +5770,10 @@ index 6ac54f257..a258528ea 100644 __ b(*stub->continuation()); } diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -index 265cd0888..590fd8662 100644 +index c70d424..7cfa70a 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp -@@ -2110,8 +2110,17 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) { +@@ -2131,8 +2131,17 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) { } // Push lots of registers in the bit set supplied. Don't push sp. @@ -5791,7 +5792,7 @@ index 265cd0888..590fd8662 100644 // Scan bitset to accumulate register pairs unsigned char regs[32]; int count = 0; -@@ -2126,8 +2135,18 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { +@@ -2147,8 +2156,18 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { return 0; } @@ -5810,7 +5811,7 @@ index 265cd0888..590fd8662 100644 if (count & 1) { strq(as_FloatRegister(regs[0]), Address(stack)); i += 1; -@@ -2140,7 +2159,16 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { +@@ -2161,7 +2180,16 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { return count; } @@ -5827,7 +5828,7 @@ index 265cd0888..590fd8662 100644 // Scan bitset to accumulate register pairs unsigned char regs[32]; int count = 0; -@@ -2155,6 +2183,16 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { +@@ -2176,6 +2204,16 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { return 0; } @@ -5844,7 +5845,7 @@ index 265cd0888..590fd8662 100644 if (count & 1) { ldrq(as_FloatRegister(regs[0]), Address(stack)); i += 1; -@@ -2638,23 +2676,39 @@ void MacroAssembler::pop_call_clobbered_registers() { +@@ -2659,23 +2697,39 @@ void MacroAssembler::pop_call_clobbered_registers() { pop(call_clobbered_registers() - RegSet::of(rscratch1, rscratch2), sp); } @@ -5894,10 +5895,10 @@ index 265cd0888..590fd8662 100644 + ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2), + as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step))); + } - + // integer registers except lr & sp pop(RegSet::range(r0, r17), sp); -@@ -2703,6 +2757,21 @@ Address MacroAssembler::spill_address(int size, int offset, Register tmp) +@@ -2732,6 +2786,21 @@ Address MacroAssembler::spill_address(int size, int offset, Register tmp) return Address(base, offset); } @@ -5919,7 +5920,7 @@ index 265cd0888..590fd8662 100644 // Checks whether offset is aligned. // Returns true if it is, else false. bool MacroAssembler::merge_alignment_check(Register base, -@@ -5901,3 +5970,13 @@ void MacroAssembler::verify_sve_vector_length() { +@@ -5930,3 +5999,13 @@ void MacroAssembler::verify_sve_vector_length() { stop("Error: SVE vector length has changed since jvm startup"); bind(verify_ok); } @@ -5934,42 +5935,42 @@ index 265cd0888..590fd8662 100644 + bind(verify_ok); +} diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp -index 7fd9e3c97..bc3175b2b 100644 +index ec9b3cc..07e3169 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -862,8 +862,10 @@ public: - + DEBUG_ONLY(void verify_heapbase(const char* msg);) - + - void push_CPU_state(bool save_vectors = false); - void pop_CPU_state(bool restore_vectors = false) ; + void push_CPU_state(bool save_vectors = false, bool use_sve = false, + int sve_vector_size_in_bytes = 0); + void pop_CPU_state(bool restore_vectors = false, bool use_sve = false, + int sve_vector_size_in_bytes = 0); - + // Round up to a power of two void round_to(Register reg, int modulus); @@ -939,6 +941,10 @@ public: Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); - + void verify_sve_vector_length(); + void reinitialize_ptrue() { + sve_ptrue(ptrue, B); + } + void verify_ptrue(); - + // Debugging - -@@ -1320,6 +1326,7 @@ private: + +@@ -1338,6 +1344,7 @@ private: // Returns an address on the stack which is reachable with a ldr/str of size // Uses rscratch2 if the address is not directly reachable Address spill_address(int size, int offset, Register tmp=rscratch2); + Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2); - + bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const; - -@@ -1343,6 +1350,9 @@ public: + +@@ -1361,6 +1368,9 @@ public: void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) { str(Vx, T, spill_address(1 << (int)T, offset)); } @@ -5979,7 +5980,7 @@ index 7fd9e3c97..bc3175b2b 100644 void unspill(Register Rx, bool is64, int offset) { if (is64) { ldr(Rx, spill_address(8, offset)); -@@ -1353,6 +1363,9 @@ public: +@@ -1371,6 +1381,9 @@ public: void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) { ldr(Vx, T, spill_address(1 << (int)T, offset)); } @@ -5989,7 +5990,7 @@ index 7fd9e3c97..bc3175b2b 100644 void spill_copy128(int src_offset, int dst_offset, Register tmp1=rscratch1, Register tmp2=rscratch2) { if (src_offset < 512 && (src_offset & 7) == 0 && -@@ -1366,6 +1379,15 @@ public: +@@ -1384,6 +1397,15 @@ public: spill(tmp1, true, dst_offset+8); } } @@ -6006,7 +6007,7 @@ index 7fd9e3c97..bc3175b2b 100644 #ifdef ASSERT diff --git a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp -index 15131ed32..e337f582a 100644 +index 1602a78..e476456 100644 --- a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp @@ -196,3 +196,5 @@ REGISTER_DEFINITION(PRegister, p4); @@ -6016,10 +6017,10 @@ index 15131ed32..e337f582a 100644 + +REGISTER_DEFINITION(PRegister, ptrue); diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp -index 3bf7284a7..6242cce08 100644 +index 4b35aa6..491e29d 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp -@@ -151,7 +151,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ +@@ -152,7 +152,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ // Save Integer and Float registers. __ enter(); @@ -6028,7 +6029,7 @@ index 3bf7284a7..6242cce08 100644 // Set an oopmap for the call site. This oopmap will map all // oop-registers and debug-info registers as callee-saved. This -@@ -190,10 +190,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ +@@ -191,10 +191,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ } void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { @@ -6045,7 +6046,7 @@ index 3bf7284a7..6242cce08 100644 __ leave(); } -@@ -2786,6 +2791,12 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t +@@ -2810,6 +2815,12 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t __ maybe_isb(); __ membar(Assembler::LoadLoad | Assembler::LoadStore); @@ -6059,7 +6060,7 @@ index 3bf7284a7..6242cce08 100644 __ cbz(rscratch1, noException); diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp -index 26a54c87e..85f64c007 100644 +index d307871..cd3f6f4 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -488,6 +488,11 @@ class StubGenerator: public StubCodeGenerator { @@ -6074,7 +6075,7 @@ index 26a54c87e..85f64c007 100644 // we should not really care that lr is no longer the callee // address. we saved the value the handler needs in r19 so we can // just copy it to r3. however, the C2 handler will push its own -@@ -5092,6 +5097,12 @@ class StubGenerator: public StubCodeGenerator { +@@ -5421,6 +5426,12 @@ class StubGenerator: public StubCodeGenerator { __ reset_last_Java_frame(true); __ maybe_isb(); @@ -6088,7 +6089,7 @@ index 26a54c87e..85f64c007 100644 // check for pending exceptions diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad -index 0ef4d7f3e..03248b2e0 100644 +index f142afa..98e6780 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) { @@ -6101,7 +6102,7 @@ index 0ef4d7f3e..03248b2e0 100644 // TODO // identify extra cases that we might want to provide match rules for diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad -index 7ee16a975..571a6aeb0 100644 +index b3bf64c..cc1e1a1 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -2242,7 +2242,7 @@ const bool Matcher::match_rule_supported(int opcode) { @@ -6114,7 +6115,7 @@ index 7ee16a975..571a6aeb0 100644 // TODO // identify extra cases that we might want to provide match rules for diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad -index ea09aaafc..782c1c7c4 100644 +index ea09aaa..782c1c7 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -1522,7 +1522,7 @@ const bool Matcher::match_rule_supported(int opcode) { @@ -6127,7 +6128,7 @@ index ea09aaafc..782c1c7c4 100644 // Identify extra cases that we might want to provide match rules for // e.g. Op_ vector nodes and other intrinsics while guarding with vlen. diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad -index a09c795c9..3b1b1046e 100644 +index a09c795..3b1b104 100644 --- a/src/hotspot/cpu/sparc/sparc.ad +++ b/src/hotspot/cpu/sparc/sparc.ad @@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) { @@ -6140,10 +6141,10 @@ index a09c795c9..3b1b1046e 100644 // TODO // identify extra cases that we might want to provide match rules for diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad -index 76dd6addd..93aee6d6c 100644 +index 4e1336f..b75d0ff 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad -@@ -1354,7 +1354,7 @@ const bool Matcher::match_rule_supported(int opcode) { +@@ -1379,7 +1379,7 @@ const bool Matcher::match_rule_supported(int opcode) { return ret_value; // Per default match rules are supported. } @@ -6153,7 +6154,7 @@ index 76dd6addd..93aee6d6c 100644 // e.g. Op_ vector nodes and other intrinsics while guarding with vlen bool ret_value = match_rule_supported(opcode); diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp -index ed890f88e..9a8307102 100644 +index ed890f8..9a83071 100644 --- a/src/hotspot/share/opto/matcher.hpp +++ b/src/hotspot/share/opto/matcher.hpp @@ -310,7 +310,7 @@ public: @@ -6166,7 +6167,7 @@ index ed890f88e..9a8307102 100644 // Some microarchitectures have mask registers used on vectors static const bool has_predicated_vectors(void); diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp -index 92f70b77d..ed67928f5 100644 +index fed52e4..ee58323 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -96,8 +96,11 @@ static const bool _do_vector_loop_experimental = false; // Experimental vectoriz @@ -6184,7 +6185,7 @@ index 92f70b77d..ed67928f5 100644 assert(lpt->_head->is_CountedLoop(), "must be"); CountedLoopNode *cl = lpt->_head->as_CountedLoop(); diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp -index 1f2cf2c64..6867177c1 100644 +index 1f2cf2c..6867177 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -1,5 +1,5 @@ @@ -6212,6 +6213,3 @@ index 1f2cf2c64..6867177c1 100644 } return false; } --- -2.19.1 - diff --git a/8268427-Improve-AlgorithmConstraints-checkAlgorithm-.patch b/8268427-Improve-AlgorithmConstraints-checkAlgorithm-.patch deleted file mode 100755 index 776c4e75397edbd72d5c3dd0c3148f5a6f7d1a73..0000000000000000000000000000000000000000 --- a/8268427-Improve-AlgorithmConstraints-checkAlgorithm-.patch +++ /dev/null @@ -1,242 +0,0 @@ -From e3d9485d01941cfbbe01dc8dcea7b913c2e8469d Mon Sep 17 00:00:00 2001 -From: chenshanyao -Date: Tue, 14 Sep 2021 11:43:18 +0800 -Subject: [PATCH 8/8] 8268427: Improve AlgorithmConstraints:checkAlgorithm - performance - -Summary: : performance -LLT: jdk_security -Patch Type: backport -Bug url: https://bugs.openjdk.java.net/browse/JDK-8268427 ---- - .../util/AbstractAlgorithmConstraints.java | 39 +++++------ - .../util/DisabledAlgorithmConstraints.java | 28 ++++---- - .../util/LegacyAlgorithmConstraints.java | 2 +- - .../security/AlgorithmConstraintsPermits.java | 66 +++++++++++++++++++ - 4 files changed, 95 insertions(+), 40 deletions(-) - create mode 100644 test/micro/org/openjdk/bench/java/security/AlgorithmConstraintsPermits.java - -diff --git a/src/java.base/share/classes/sun/security/util/AbstractAlgorithmConstraints.java b/src/java.base/share/classes/sun/security/util/AbstractAlgorithmConstraints.java -index 8d8c5d6fe..3f5678950 100644 ---- a/src/java.base/share/classes/sun/security/util/AbstractAlgorithmConstraints.java -+++ b/src/java.base/share/classes/sun/security/util/AbstractAlgorithmConstraints.java -@@ -32,6 +32,7 @@ import java.security.Security; - import java.util.ArrayList; - import java.util.Arrays; - import java.util.Collections; -+import java.util.TreeSet; - import java.util.List; - import java.util.Set; - -@@ -48,7 +49,7 @@ public abstract class AbstractAlgorithmConstraints - } - - // Get algorithm constraints from the specified security property. -- static List getAlgorithms(String propertyName) { -+ static Set getAlgorithms(String propertyName) { - String property = AccessController.doPrivileged( - new PrivilegedAction() { - @Override -@@ -72,38 +73,30 @@ public abstract class AbstractAlgorithmConstraints - - // map the disabled algorithms - if (algorithmsInProperty == null) { -- return Collections.emptyList(); -+ return Collections.emptySet(); - } -- return new ArrayList<>(Arrays.asList(algorithmsInProperty)); -+ Set algorithmsInPropertySet = new TreeSet<>(String.CASE_INSENSITIVE_ORDER); -+ algorithmsInPropertySet.addAll(Arrays.asList(algorithmsInProperty)); -+ return algorithmsInPropertySet; - } - -- static boolean checkAlgorithm(List algorithms, String algorithm, -+ static boolean checkAlgorithm(Set algorithms, String algorithm, - AlgorithmDecomposer decomposer) { - if (algorithm == null || algorithm.isEmpty()) { - throw new IllegalArgumentException("No algorithm name specified"); - } - -- Set elements = null; -- for (String item : algorithms) { -- if (item == null || item.isEmpty()) { -- continue; -- } -- -- // check the full name -- if (item.equalsIgnoreCase(algorithm)) { -- return false; -- } -+ if (algorithms.contains(algorithm)) { -+ return false; -+ } - -- // decompose the algorithm into sub-elements -- if (elements == null) { -- elements = decomposer.decompose(algorithm); -- } -+ // decompose the algorithm into sub-elements -+ Set elements = decomposer.decompose(algorithm); - -- // check the items of the algorithm -- for (String element : elements) { -- if (item.equalsIgnoreCase(element)) { -- return false; -- } -+ // check the element of the elements -+ for (String element : elements) { -+ if (algorithms.contains(element)) { -+ return false; - } - } - -diff --git a/src/java.base/share/classes/sun/security/util/DisabledAlgorithmConstraints.java b/src/java.base/share/classes/sun/security/util/DisabledAlgorithmConstraints.java -index 3ee431e62..efc6d339f 100644 ---- a/src/java.base/share/classes/sun/security/util/DisabledAlgorithmConstraints.java -+++ b/src/java.base/share/classes/sun/security/util/DisabledAlgorithmConstraints.java -@@ -85,6 +85,9 @@ public class DisabledAlgorithmConstraints extends AbstractAlgorithmConstraints { - private static final String PROPERTY_DISABLED_EC_CURVES = - "jdk.disabled.namedCurves"; - -+ private static final Pattern INCLUDE_PATTERN = Pattern.compile("include " + -+ PROPERTY_DISABLED_EC_CURVES, Pattern.CASE_INSENSITIVE); -+ - private static class CertPathHolder { - static final DisabledAlgorithmConstraints CONSTRAINTS = - new DisabledAlgorithmConstraints(PROPERTY_CERTPATH_DISABLED_ALGS); -@@ -95,7 +98,7 @@ public class DisabledAlgorithmConstraints extends AbstractAlgorithmConstraints { - new DisabledAlgorithmConstraints(PROPERTY_JAR_DISABLED_ALGS); - } - -- private final List disabledAlgorithms; -+ private final Set disabledAlgorithms; - private final Constraints algorithmConstraints; - - public static DisabledAlgorithmConstraints certPathConstraints() { -@@ -130,21 +133,14 @@ public class DisabledAlgorithmConstraints extends AbstractAlgorithmConstraints { - disabledAlgorithms = getAlgorithms(propertyName); - - // Check for alias -- int ecindex = -1, i = 0; - for (String s : disabledAlgorithms) { -- if (s.regionMatches(true, 0,"include ", 0, 8)) { -- if (s.regionMatches(true, 8, PROPERTY_DISABLED_EC_CURVES, 0, -- PROPERTY_DISABLED_EC_CURVES.length())) { -- ecindex = i; -- break; -- } -+ Matcher matcher = INCLUDE_PATTERN.matcher(s); -+ if (matcher.matches()) { -+ disabledAlgorithms.remove(matcher.group()); -+ disabledAlgorithms.addAll( -+ getAlgorithms(PROPERTY_DISABLED_EC_CURVES)); -+ break; - } -- i++; -- } -- if (ecindex > -1) { -- disabledAlgorithms.remove(ecindex); -- disabledAlgorithms.addAll(ecindex, -- getAlgorithms(PROPERTY_DISABLED_EC_CURVES)); - } - algorithmConstraints = new Constraints(propertyName, disabledAlgorithms); - } -@@ -323,8 +319,8 @@ public class DisabledAlgorithmConstraints extends AbstractAlgorithmConstraints { - "denyAfter\\s+(\\d{4})-(\\d{2})-(\\d{2})"); - } - -- public Constraints(String propertyName, List constraintArray) { -- for (String constraintEntry : constraintArray) { -+ public Constraints(String propertyName, Set constraintSet) { -+ for (String constraintEntry : constraintSet) { - if (constraintEntry == null || constraintEntry.isEmpty()) { - continue; - } -diff --git a/src/java.base/share/classes/sun/security/util/LegacyAlgorithmConstraints.java b/src/java.base/share/classes/sun/security/util/LegacyAlgorithmConstraints.java -index e4e5cedc1..550173080 100644 ---- a/src/java.base/share/classes/sun/security/util/LegacyAlgorithmConstraints.java -+++ b/src/java.base/share/classes/sun/security/util/LegacyAlgorithmConstraints.java -@@ -40,7 +40,7 @@ public class LegacyAlgorithmConstraints extends AbstractAlgorithmConstraints { - public static final String PROPERTY_TLS_LEGACY_ALGS = - "jdk.tls.legacyAlgorithms"; - -- private final List legacyAlgorithms; -+ private final Set legacyAlgorithms; - - public LegacyAlgorithmConstraints(String propertyName, - AlgorithmDecomposer decomposer) { -diff --git a/test/micro/org/openjdk/bench/java/security/AlgorithmConstraintsPermits.java b/test/micro/org/openjdk/bench/java/security/AlgorithmConstraintsPermits.java -new file mode 100644 -index 000000000..3cb9567b9 ---- /dev/null -+++ b/test/micro/org/openjdk/bench/java/security/AlgorithmConstraintsPermits.java -@@ -0,0 +1,66 @@ -+/* -+ * Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ */ -+package org.openjdk.bench.java.security; -+ -+import org.openjdk.jmh.annotations.Benchmark; -+import org.openjdk.jmh.annotations.BenchmarkMode; -+import org.openjdk.jmh.annotations.Fork; -+import org.openjdk.jmh.annotations.Mode; -+import org.openjdk.jmh.annotations.OutputTimeUnit; -+import org.openjdk.jmh.annotations.Param; -+import org.openjdk.jmh.annotations.Scope; -+import org.openjdk.jmh.annotations.Setup; -+import org.openjdk.jmh.annotations.State; -+import sun.security.util.DisabledAlgorithmConstraints; -+ -+import java.security.AlgorithmConstraints; -+import java.security.CryptoPrimitive; -+import java.util.concurrent.TimeUnit; -+import java.util.EnumSet; -+import java.util.Set; -+ -+import static sun.security.util.DisabledAlgorithmConstraints.PROPERTY_TLS_DISABLED_ALGS; -+ -+@BenchmarkMode(Mode.AverageTime) -+@OutputTimeUnit(TimeUnit.NANOSECONDS) -+@Fork(jvmArgsAppend = {"--add-exports", "java.base/sun.security.util=ALL-UNNAMED"}) -+@State(Scope.Thread) -+public class AlgorithmConstraintsPermits { -+ -+ AlgorithmConstraints tlsDisabledAlgConstraints; -+ Set primitives = EnumSet.of(CryptoPrimitive.KEY_AGREEMENT); -+ -+ @Param({"SSLv3", "DES", "NULL", "TLS1.3"}) -+ String algorithm; -+ -+ @Setup -+ public void setup() { -+ tlsDisabledAlgConstraints = new DisabledAlgorithmConstraints(PROPERTY_TLS_DISABLED_ALGS); -+ } -+ -+ @Benchmark -+ public boolean permits() { -+ return tlsDisabledAlgConstraints.permits(primitives, algorithm, null); -+ } -+} -+ --- -2.22.0 - diff --git a/8295068-SSLEngine-throws-NPE-parsing-Certificate.patch b/8295068-SSLEngine-throws-NPE-parsing-Certificate.patch new file mode 100644 index 0000000000000000000000000000000000000000..c69ccf42d4c01393b6a84500efd504249c39a441 --- /dev/null +++ b/8295068-SSLEngine-throws-NPE-parsing-Certificate.patch @@ -0,0 +1,21 @@ +Subject: [PATCH] JDK-8295068: SSLEngine throws NPE parsing CertificateRequests + +--- + .../share/classes/sun/security/ssl/CertificateRequest.java | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/java.base/share/classes/sun/security/ssl/CertificateRequest.java b/src/java.base/share/classes/sun/security/ssl/CertificateRequest.java +index 8e8370ba7..504aefb1a 100644 +--- a/src/java.base/share/classes/sun/security/ssl/CertificateRequest.java ++++ b/src/java.base/share/classes/sun/security/ssl/CertificateRequest.java +@@ -135,7 +135,7 @@ final class CertificateRequest { + ArrayList keyTypes = new ArrayList<>(3); + for (byte id : ids) { + ClientCertificateType cct = ClientCertificateType.valueOf(id); +- if (cct.isAvailable) { ++ if (cct != null && cct.isAvailable) { + keyTypes.add(cct.keyAlgorithm); + } + } +-- + diff --git a/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch b/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch index de07fd8e73579473af4c7060520a47bf3cb405e9..94c979bb53ea9b48f4c27f4b120076041fc824d9 100755 --- a/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch +++ b/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch @@ -367,4 +367,4 @@ index 000000000..85b49171c --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ -+11.0.19.0.13 ++11.0.21.0.13 diff --git a/delete_expired_certificates.patch b/delete_expired_certificates.patch index 340c9a19032ab3ed94cb301f4c449734d2aff333..fac9c98573de95eb6307f286f851b81910d066f9 100644 --- a/delete_expired_certificates.patch +++ b/delete_expired_certificates.patch @@ -12,23 +12,23 @@ index 7f8bf9a66..000000000 -Subject Public Key Algorithm: 2048-bit RSA key -Version: 3 ------BEGIN CERTIFICATE----- --MIIDVDCCAjygAwIBAgIDAjRWMA0GCSqGSIb3DQEBBQUAMEIxCzAJBgNVBAYTAlVT --MRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMRswGQYDVQQDExJHZW9UcnVzdCBHbG9i --YWwgQ0EwHhcNMDIwNTIxMDQwMDAwWhcNMjIwNTIxMDQwMDAwWjBCMQswCQYDVQQG --EwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEbMBkGA1UEAxMSR2VvVHJ1c3Qg --R2xvYmFsIENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2swYYzD9 --9BcjGlZ+W988bDjkcbd4kdS8odhM+KhDtgPpTSEHCIjaWC9mOSm9BXiLnTjoBbdq --fnGk5sRgprDvgOSJKA+eJdbtg/OtppHHmMlCGDUUna2YRpIuT8rxh0PBFpVXLVDv --iS2Aelet8u5fa9IAjbkU+BQVNdnARqN7csiRv8lVK83Qlz6cJmTM386DGXHKTubU --1XupGc1V3sjs0l44U+VcT4wt/lAjNvxm5suOpDkZALeVAjmRCw7+OC7RHQWa9k0+ --bw8HHa8sHo9gOeL6NlMTOdReJivbPagUvTLrGAMoUgRx5aszPeE4uwc2hGKceeoW --MPRfwCvocWvk+QIDAQABo1MwUTAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTA --ephojYn7qwVkDBF9qn1luMrMTjAfBgNVHSMEGDAWgBTAephojYn7qwVkDBF9qn1l --uMrMTjANBgkqhkiG9w0BAQUFAAOCAQEANeMpauUvXVSOKVCUn5kaFOSPeCpilKIn --Z57QzxpeR+nBsqTP3UEaBU6bS+5Kb1VSsyShNwrrZHYqLizz/Tt1kL/6cdjHPTfS --tQWVYrmm3ok9Nns4d0iXrKYgjy6myQzCsplFAMfOEVEiIuCl6rYVSAlk6l5PdPcF --PseKUgzbFbS9bZvlxrFUaKnjaZC2mqUPuLk/IH2uSrW4nOQdtqvmlKXBx4Ot2/Un --hw4EbNX/3aBd7YdStysVAq45pmp06drE57xNNB6pXE0zX5IJL4hmXXeXxx12E6nV +-MIIDVDCCAjygAwIBAgIDAjRWMA0GCSqGSIb3DQEBBQUAMEIxCzAJBgNVBAYTAlVT +-MRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMRswGQYDVQQDExJHZW9UcnVzdCBHbG9i +-YWwgQ0EwHhcNMDIwNTIxMDQwMDAwWhcNMjIwNTIxMDQwMDAwWjBCMQswCQYDVQQG +-EwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEbMBkGA1UEAxMSR2VvVHJ1c3Qg +-R2xvYmFsIENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2swYYzD9 +-9BcjGlZ+W988bDjkcbd4kdS8odhM+KhDtgPpTSEHCIjaWC9mOSm9BXiLnTjoBbdq +-fnGk5sRgprDvgOSJKA+eJdbtg/OtppHHmMlCGDUUna2YRpIuT8rxh0PBFpVXLVDv +-iS2Aelet8u5fa9IAjbkU+BQVNdnARqN7csiRv8lVK83Qlz6cJmTM386DGXHKTubU +-1XupGc1V3sjs0l44U+VcT4wt/lAjNvxm5suOpDkZALeVAjmRCw7+OC7RHQWa9k0+ +-bw8HHa8sHo9gOeL6NlMTOdReJivbPagUvTLrGAMoUgRx5aszPeE4uwc2hGKceeoW +-MPRfwCvocWvk+QIDAQABo1MwUTAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTA +-ephojYn7qwVkDBF9qn1luMrMTjAfBgNVHSMEGDAWgBTAephojYn7qwVkDBF9qn1l +-uMrMTjANBgkqhkiG9w0BAQUFAAOCAQEANeMpauUvXVSOKVCUn5kaFOSPeCpilKIn +-Z57QzxpeR+nBsqTP3UEaBU6bS+5Kb1VSsyShNwrrZHYqLizz/Tt1kL/6cdjHPTfS +-tQWVYrmm3ok9Nns4d0iXrKYgjy6myQzCsplFAMfOEVEiIuCl6rYVSAlk6l5PdPcF +-PseKUgzbFbS9bZvlxrFUaKnjaZC2mqUPuLk/IH2uSrW4nOQdtqvmlKXBx4Ot2/Un +-hw4EbNX/3aBd7YdStysVAq45pmp06drE57xNNB6pXE0zX5IJL4hmXXeXxx12E6nV -5fEWCRE11azbJHFwLJhWC9kXtNHjUStedejV0NxPNO3CBWaAocvmMw== ------END CERTIFICATE----- diff --git a/make/data/cacerts/luxtrustglobalrootca b/make/data/cacerts/luxtrustglobalrootca @@ -45,24 +45,24 @@ index 7fb3d818f..000000000 -Subject Public Key Algorithm: 2048-bit RSA key -Version: 3 ------BEGIN CERTIFICATE----- --MIIDZDCCAkygAwIBAgICC7gwDQYJKoZIhvcNAQELBQAwRDELMAkGA1UEBhMCTFUx --FjAUBgNVBAoTDUx1eFRydXN0IHMuYS4xHTAbBgNVBAMTFEx1eFRydXN0IEdsb2Jh --bCBSb290MB4XDTExMDMxNzA5NTEzN1oXDTIxMDMxNzA5NTEzN1owRDELMAkGA1UE --BhMCTFUxFjAUBgNVBAoTDUx1eFRydXN0IHMuYS4xHTAbBgNVBAMTFEx1eFRydXN0 --IEdsb2JhbCBSb290MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAsn+n --QPAiygz267Hxyw6VV0B1r6A/Ps7sqjJX5hmxZ0OYWmt8s7j6eJyqpoSyYBuAQc5j --zR8XCJmk9e8+EsdMsFeaXHhAePxFjdqRZ9w6Ubltc+a3OY52OrQfBfVpVfmTz3iI --Sr6qm9d7R1tGBEyCFqY19vx039a0r9jitScRdFmiwmYsaArhmIiIPIoFdRTjuK7z --CISbasE/MRivJ6VLm6T9eTHemD0OYcqHmMH4ijCc+j4z1aXEAwfh95Z0GAAnOCfR --K6qq4UFFi2/xJcLcopeVx0IUM115hCNq52XAV6DYXaljAeew5Ivo+MVjuOVsdJA9 --x3f8K7p56aTGEnin/wIDAQABo2AwXjAMBgNVHRMEBTADAQH/MA4GA1UdDwEB/wQE --AwIBBjAfBgNVHSMEGDAWgBQXFYWJCS8kh28/HRvk8pZ5g0gTzjAdBgNVHQ4EFgQU --FxWFiQkvJIdvPx0b5PKWeYNIE84wDQYJKoZIhvcNAQELBQADggEBAFrwHNDUUM9B --fua4nX3DcNBeNv9ujnov3kgR1TQuPLdFwlQlp+HBHjeDtpSutkVIA+qVvuucarQ3 --XB8u02uCgUNbCj8RVWOs+nwIAjegPDkEM/6XMshS5dklTbDG7mgfcKpzzlcD3H0K --DTPy0lrfCmw7zBFRlxqkIaKFNQLXgCLShLL4wKpov9XrqsMLq6F8K/f1O4fhVFfs --BSTveUJO84ton+Ruy4KZycwq3FPCH3CDqyEPVrRI/98HIrOM+R2mBN8tAza53W/+ --MYhm/2xtRDSvCHc+JtJy9LtHVpM8mGPhM7uZI5K1g3noHZ9nrWLWidb2/CfeMifL +-MIIDZDCCAkygAwIBAgICC7gwDQYJKoZIhvcNAQELBQAwRDELMAkGA1UEBhMCTFUx +-FjAUBgNVBAoTDUx1eFRydXN0IHMuYS4xHTAbBgNVBAMTFEx1eFRydXN0IEdsb2Jh +-bCBSb290MB4XDTExMDMxNzA5NTEzN1oXDTIxMDMxNzA5NTEzN1owRDELMAkGA1UE +-BhMCTFUxFjAUBgNVBAoTDUx1eFRydXN0IHMuYS4xHTAbBgNVBAMTFEx1eFRydXN0 +-IEdsb2JhbCBSb290MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAsn+n +-QPAiygz267Hxyw6VV0B1r6A/Ps7sqjJX5hmxZ0OYWmt8s7j6eJyqpoSyYBuAQc5j +-zR8XCJmk9e8+EsdMsFeaXHhAePxFjdqRZ9w6Ubltc+a3OY52OrQfBfVpVfmTz3iI +-Sr6qm9d7R1tGBEyCFqY19vx039a0r9jitScRdFmiwmYsaArhmIiIPIoFdRTjuK7z +-CISbasE/MRivJ6VLm6T9eTHemD0OYcqHmMH4ijCc+j4z1aXEAwfh95Z0GAAnOCfR +-K6qq4UFFi2/xJcLcopeVx0IUM115hCNq52XAV6DYXaljAeew5Ivo+MVjuOVsdJA9 +-x3f8K7p56aTGEnin/wIDAQABo2AwXjAMBgNVHRMEBTADAQH/MA4GA1UdDwEB/wQE +-AwIBBjAfBgNVHSMEGDAWgBQXFYWJCS8kh28/HRvk8pZ5g0gTzjAdBgNVHQ4EFgQU +-FxWFiQkvJIdvPx0b5PKWeYNIE84wDQYJKoZIhvcNAQELBQADggEBAFrwHNDUUM9B +-fua4nX3DcNBeNv9ujnov3kgR1TQuPLdFwlQlp+HBHjeDtpSutkVIA+qVvuucarQ3 +-XB8u02uCgUNbCj8RVWOs+nwIAjegPDkEM/6XMshS5dklTbDG7mgfcKpzzlcD3H0K +-DTPy0lrfCmw7zBFRlxqkIaKFNQLXgCLShLL4wKpov9XrqsMLq6F8K/f1O4fhVFfs +-BSTveUJO84ton+Ruy4KZycwq3FPCH3CDqyEPVrRI/98HIrOM+R2mBN8tAza53W/+ +-MYhm/2xtRDSvCHc+JtJy9LtHVpM8mGPhM7uZI5K1g3noHZ9nrWLWidb2/CfeMifL -hNp3hSGhEiE= ------END CERTIFICATE----- diff --git a/make/data/cacerts/quovadisrootca b/make/data/cacerts/quovadisrootca @@ -79,58 +79,58 @@ index 0c195ff51..000000000 -Subject Public Key Algorithm: 2048-bit RSA key -Version: 3 ------BEGIN CERTIFICATE----- --MIIF0DCCBLigAwIBAgIEOrZQizANBgkqhkiG9w0BAQUFADB/MQswCQYDVQQGEwJC --TTEZMBcGA1UEChMQUXVvVmFkaXMgTGltaXRlZDElMCMGA1UECxMcUm9vdCBDZXJ0 --aWZpY2F0aW9uIEF1dGhvcml0eTEuMCwGA1UEAxMlUXVvVmFkaXMgUm9vdCBDZXJ0 --aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0wMTAzMTkxODMzMzNaFw0yMTAzMTcxODMz --MzNaMH8xCzAJBgNVBAYTAkJNMRkwFwYDVQQKExBRdW9WYWRpcyBMaW1pdGVkMSUw --IwYDVQQLExxSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MS4wLAYDVQQDEyVR --dW9WYWRpcyBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIIBIjANBgkqhkiG --9w0BAQEFAAOCAQ8AMIIBCgKCAQEAv2G1lVO6V/z68mcLOhrfEYBklbTRvM16z/Yp --li4kVEAkOPcahdxYTMukJ0KX0J+DisPkBgNbAKVRHnAEdOLB1Dqr1607BxgFjv2D --rOpm2RgbaIr1VxqYuvXtdj182d6UajtLF8HVj71lODqV0D1VNk7feVcxKh7YWWVJ --WCCYfqtffp/p1k3sg3Spx2zY7ilKhSoGFPlU5tPaZQeLYzcS19Dsw3sgQUSj7cug --F+FxZc4dZjH3dgEZyH0DWLaVSR2mEiboxgx24ONmy+pdpibu5cxfvWenAScOospU --xbF6lR1xHkopigPcakXBpBlebzbNw6Kwt/5cOOJSvPhEQ+aQuwIDAQABo4ICUjCC --Ak4wPQYIKwYBBQUHAQEEMTAvMC0GCCsGAQUFBzABhiFodHRwczovL29jc3AucXVv --dmFkaXNvZmZzaG9yZS5jb20wDwYDVR0TAQH/BAUwAwEB/zCCARoGA1UdIASCAREw --ggENMIIBCQYJKwYBBAG+WAABMIH7MIHUBggrBgEFBQcCAjCBxxqBxFJlbGlhbmNl --IG9uIHRoZSBRdW9WYWRpcyBSb290IENlcnRpZmljYXRlIGJ5IGFueSBwYXJ0eSBh --c3N1bWVzIGFjY2VwdGFuY2Ugb2YgdGhlIHRoZW4gYXBwbGljYWJsZSBzdGFuZGFy --ZCB0ZXJtcyBhbmQgY29uZGl0aW9ucyBvZiB1c2UsIGNlcnRpZmljYXRpb24gcHJh --Y3RpY2VzLCBhbmQgdGhlIFF1b1ZhZGlzIENlcnRpZmljYXRlIFBvbGljeS4wIgYI --KwYBBQUHAgEWFmh0dHA6Ly93d3cucXVvdmFkaXMuYm0wHQYDVR0OBBYEFItLbe3T --KbkGGew5Oanwl4Rqy+/fMIGuBgNVHSMEgaYwgaOAFItLbe3TKbkGGew5Oanwl4Rq --y+/foYGEpIGBMH8xCzAJBgNVBAYTAkJNMRkwFwYDVQQKExBRdW9WYWRpcyBMaW1p --dGVkMSUwIwYDVQQLExxSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MS4wLAYD --VQQDEyVRdW9WYWRpcyBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5ggQ6tlCL --MA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQUFAAOCAQEAitQUtf70mpKnGdSk --fnIYj9lofFIk3WdvOXrEql494liwTXCYhGHoG+NpGA7O+0dQoE7/8CQfvbLO9Sf8 --7C9TqnN7Az10buYWnuulLsS/VidQK2K6vkscPFVcQR0kvoIgR13VRH56FmjffU1R --cHhXHTMe/QKZnAzNCgVPx7uOpHX6Sm2xgI4JVrmcGmD+XcHXetwReNDWXcG31a0y --mQM6isxUJTkxgXsTIlG6Rmyhu576BGxJJnSP0nPrzDCi5upZIof4l/UO/erMkqQW --xFIY6iHOsfHmhIHluqmGKPJDWl0Snawe2ajlCmqnf6CHKc/yiU3U7MXi5nrQNiOK +-MIIF0DCCBLigAwIBAgIEOrZQizANBgkqhkiG9w0BAQUFADB/MQswCQYDVQQGEwJC +-TTEZMBcGA1UEChMQUXVvVmFkaXMgTGltaXRlZDElMCMGA1UECxMcUm9vdCBDZXJ0 +-aWZpY2F0aW9uIEF1dGhvcml0eTEuMCwGA1UEAxMlUXVvVmFkaXMgUm9vdCBDZXJ0 +-aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0wMTAzMTkxODMzMzNaFw0yMTAzMTcxODMz +-MzNaMH8xCzAJBgNVBAYTAkJNMRkwFwYDVQQKExBRdW9WYWRpcyBMaW1pdGVkMSUw +-IwYDVQQLExxSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MS4wLAYDVQQDEyVR +-dW9WYWRpcyBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIIBIjANBgkqhkiG +-9w0BAQEFAAOCAQ8AMIIBCgKCAQEAv2G1lVO6V/z68mcLOhrfEYBklbTRvM16z/Yp +-li4kVEAkOPcahdxYTMukJ0KX0J+DisPkBgNbAKVRHnAEdOLB1Dqr1607BxgFjv2D +-rOpm2RgbaIr1VxqYuvXtdj182d6UajtLF8HVj71lODqV0D1VNk7feVcxKh7YWWVJ +-WCCYfqtffp/p1k3sg3Spx2zY7ilKhSoGFPlU5tPaZQeLYzcS19Dsw3sgQUSj7cug +-F+FxZc4dZjH3dgEZyH0DWLaVSR2mEiboxgx24ONmy+pdpibu5cxfvWenAScOospU +-xbF6lR1xHkopigPcakXBpBlebzbNw6Kwt/5cOOJSvPhEQ+aQuwIDAQABo4ICUjCC +-Ak4wPQYIKwYBBQUHAQEEMTAvMC0GCCsGAQUFBzABhiFodHRwczovL29jc3AucXVv +-dmFkaXNvZmZzaG9yZS5jb20wDwYDVR0TAQH/BAUwAwEB/zCCARoGA1UdIASCAREw +-ggENMIIBCQYJKwYBBAG+WAABMIH7MIHUBggrBgEFBQcCAjCBxxqBxFJlbGlhbmNl +-IG9uIHRoZSBRdW9WYWRpcyBSb290IENlcnRpZmljYXRlIGJ5IGFueSBwYXJ0eSBh +-c3N1bWVzIGFjY2VwdGFuY2Ugb2YgdGhlIHRoZW4gYXBwbGljYWJsZSBzdGFuZGFy +-ZCB0ZXJtcyBhbmQgY29uZGl0aW9ucyBvZiB1c2UsIGNlcnRpZmljYXRpb24gcHJh +-Y3RpY2VzLCBhbmQgdGhlIFF1b1ZhZGlzIENlcnRpZmljYXRlIFBvbGljeS4wIgYI +-KwYBBQUHAgEWFmh0dHA6Ly93d3cucXVvdmFkaXMuYm0wHQYDVR0OBBYEFItLbe3T +-KbkGGew5Oanwl4Rqy+/fMIGuBgNVHSMEgaYwgaOAFItLbe3TKbkGGew5Oanwl4Rq +-y+/foYGEpIGBMH8xCzAJBgNVBAYTAkJNMRkwFwYDVQQKExBRdW9WYWRpcyBMaW1p +-dGVkMSUwIwYDVQQLExxSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MS4wLAYD +-VQQDEyVRdW9WYWRpcyBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5ggQ6tlCL +-MA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQUFAAOCAQEAitQUtf70mpKnGdSk +-fnIYj9lofFIk3WdvOXrEql494liwTXCYhGHoG+NpGA7O+0dQoE7/8CQfvbLO9Sf8 +-7C9TqnN7Az10buYWnuulLsS/VidQK2K6vkscPFVcQR0kvoIgR13VRH56FmjffU1R +-cHhXHTMe/QKZnAzNCgVPx7uOpHX6Sm2xgI4JVrmcGmD+XcHXetwReNDWXcG31a0y +-mQM6isxUJTkxgXsTIlG6Rmyhu576BGxJJnSP0nPrzDCi5upZIof4l/UO/erMkqQW +-xFIY6iHOsfHmhIHluqmGKPJDWl0Snawe2ajlCmqnf6CHKc/yiU3U7MXi5nrQNiOK -SnQ2+Q== ------END CERTIFICATE----- diff --git a/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java b/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java index 122a01901..c131bd493 100644 --- a/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java +++ b/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java -@@ -53,12 +53,12 @@ public class VerifyCACerts { +@@ -47,12 +47,12 @@ public class VerifyCACerts { + File.separator + "security" + File.separator + "cacerts"; - + // The numbers of certs now. -- private static final int COUNT = 90; -+ private static final int COUNT = 87; - +- private static final int COUNT = 97; ++ private static final int COUNT = 94; + // SHA-256 of cacerts, can be generated with // shasum -a 256 cacerts | sed -e 's/../&:/g' | tr '[:lower:]' '[:upper:]' | cut -c1-95 private static final String CHECKSUM -- = "21:8C:35:29:4C:E2:49:D2:83:30:DF:8B:5E:39:F8:8C:D6:C5:2B:59:05:32:74:E5:79:A5:91:9F:3C:57:B9:E3"; -+ = "D5:5B:7A:BD:8F:4A:DA:19:75:90:28:61:E7:40:6D:A2:54:F5:64:C0:F0:30:29:16:FB:46:9B:57:D5:F7:04:D7"; - +- = "88:72:92:56:FF:E5:A3:E4:39:98:6D:18:0B:BA:CC:0B:66:CB:1D:6D:52:CE:D7:C8:AD:63:B7:F1:5F:02:24:52"; ++ = "F3:7B:BE:30:2F:68:F9:75:82:21:0B:81:E1:26:32:4C:C0:00:32:B0:66:70:8E:A7:36:65:E8:86:6A:72:4E:F8"; // map of cert alias to SHA-256 fingerprint @SuppressWarnings("serial") + private static final Map FINGERPRINT_MAP = new HashMap<>() { @@ -116,8 +116,6 @@ public class VerifyCACerts { "7E:37:CB:8B:4C:47:09:0C:AB:36:55:1B:A6:F4:5D:B8:40:68:0F:BA:16:6A:95:2D:B1:00:71:7F:43:05:3F:C2"); put("digicerthighassuranceevrootca [jdk]", @@ -163,4 +163,4 @@ index 122a01901..c131bd493 100644 - add("geotrustglobalca [jdk]"); } }; - + diff --git a/fix_X509TrustManagerImpl_symantec_distrust.patch b/fix_X509TrustManagerImpl_symantec_distrust.patch deleted file mode 100644 index 570e0666d66509e55e529bd7fe86ea0ef06311c3..0000000000000000000000000000000000000000 --- a/fix_X509TrustManagerImpl_symantec_distrust.patch +++ /dev/null @@ -1,72 +0,0 @@ -diff --git a/make/data/cacerts/geotrustglobalca b/make/data/cacerts/geotrustglobalca -new file mode 100644 -index 000000000..7f8bf9a66 ---- /dev/null -+++ b/make/data/cacerts/geotrustglobalca -@@ -0,0 +1,27 @@ -+Owner: CN=GeoTrust Global CA, O=GeoTrust Inc., C=US -+Issuer: CN=GeoTrust Global CA, O=GeoTrust Inc., C=US -+Serial number: 23456 -+Valid from: Tue May 21 04:00:00 GMT 2002 until: Sat May 21 04:00:00 GMT 2022 -+Signature algorithm name: SHA1withRSA -+Subject Public Key Algorithm: 2048-bit RSA key -+Version: 3 -+-----BEGIN CERTIFICATE----- -+MIIDVDCCAjygAwIBAgIDAjRWMA0GCSqGSIb3DQEBBQUAMEIxCzAJBgNVBAYTAlVT -+MRYwFAYDVQQKEw1HZW9UcnVzdCBJbmMuMRswGQYDVQQDExJHZW9UcnVzdCBHbG9i -+YWwgQ0EwHhcNMDIwNTIxMDQwMDAwWhcNMjIwNTIxMDQwMDAwWjBCMQswCQYDVQQG -+EwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEbMBkGA1UEAxMSR2VvVHJ1c3Qg -+R2xvYmFsIENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2swYYzD9 -+9BcjGlZ+W988bDjkcbd4kdS8odhM+KhDtgPpTSEHCIjaWC9mOSm9BXiLnTjoBbdq -+fnGk5sRgprDvgOSJKA+eJdbtg/OtppHHmMlCGDUUna2YRpIuT8rxh0PBFpVXLVDv -+iS2Aelet8u5fa9IAjbkU+BQVNdnARqN7csiRv8lVK83Qlz6cJmTM386DGXHKTubU -+1XupGc1V3sjs0l44U+VcT4wt/lAjNvxm5suOpDkZALeVAjmRCw7+OC7RHQWa9k0+ -+bw8HHa8sHo9gOeL6NlMTOdReJivbPagUvTLrGAMoUgRx5aszPeE4uwc2hGKceeoW -+MPRfwCvocWvk+QIDAQABo1MwUTAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTA -+ephojYn7qwVkDBF9qn1luMrMTjAfBgNVHSMEGDAWgBTAephojYn7qwVkDBF9qn1l -+uMrMTjANBgkqhkiG9w0BAQUFAAOCAQEANeMpauUvXVSOKVCUn5kaFOSPeCpilKIn -+Z57QzxpeR+nBsqTP3UEaBU6bS+5Kb1VSsyShNwrrZHYqLizz/Tt1kL/6cdjHPTfS -+tQWVYrmm3ok9Nns4d0iXrKYgjy6myQzCsplFAMfOEVEiIuCl6rYVSAlk6l5PdPcF -+PseKUgzbFbS9bZvlxrFUaKnjaZC2mqUPuLk/IH2uSrW4nOQdtqvmlKXBx4Ot2/Un -+hw4EbNX/3aBd7YdStysVAq45pmp06drE57xNNB6pXE0zX5IJL4hmXXeXxx12E6nV -+5fEWCRE11azbJHFwLJhWC9kXtNHjUStedejV0NxPNO3CBWaAocvmMw== -+-----END CERTIFICATE----- -diff --git a/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java b/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java -index c131bd493..478cc7235 100644 ---- a/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java -+++ b/test/jdk/sun/security/lib/cacerts/VerifyCACerts.java -@@ -53,12 +53,12 @@ public class VerifyCACerts { - + File.separator + "security" + File.separator + "cacerts"; - - // The numbers of certs now. -- private static final int COUNT = 86; -+ private static final int COUNT = 87; - - // SHA-256 of cacerts, can be generated with - // shasum -a 256 cacerts | sed -e 's/../&:/g' | tr '[:lower:]' '[:upper:]' | cut -c1-95 - private static final String CHECKSUM -- = "89:78:5A:96:F4:B2:68:4C:91:C0:32:2C:ED:2D:6B:3B:26:B8:37:C3:07:DD:9E:50:87:53:53:7A:24:98:97:E0"; -+ = "63:C4:11:7D:BF:C5:05:2B:BF:C2:B4:5A:2C:B6:26:C4:57:76:FB:D4:48:3B:E7:4C:62:B0:A1:7B:4F:07:B1:0C"; - - // map of cert alias to SHA-256 fingerprint - @SuppressWarnings("serial") -@@ -116,7 +116,9 @@ public class VerifyCACerts { - "7E:37:CB:8B:4C:47:09:0C:AB:36:55:1B:A6:F4:5D:B8:40:68:0F:BA:16:6A:95:2D:B1:00:71:7F:43:05:3F:C2"); - put("digicerthighassuranceevrootca [jdk]", - "74:31:E5:F4:C3:C1:CE:46:90:77:4F:0B:61:E0:54:40:88:3B:A9:A0:1E:D0:0B:A6:AB:D7:80:6E:D3:B1:18:CF"); -- put("geotrustprimaryca [jdk]", -+ put("geotrustglobalca [jdk]", -+ "FF:85:6A:2D:25:1D:CD:88:D3:66:56:F4:50:12:67:98:CF:AB:AA:DE:40:79:9C:72:2D:E4:D2:B5:DB:36:A7:3A"); -+ put("geotrustprimaryca [jdk]", - "37:D5:10:06:C5:12:EA:AB:62:64:21:F1:EC:8C:92:01:3F:C5:F8:2A:E9:8E:E5:33:EB:46:19:B8:DE:B4:D0:6C"); - put("geotrustprimarycag2 [jdk]", - "5E:DB:7A:C4:3B:82:A0:6A:87:61:E8:D7:BE:49:79:EB:F2:61:1F:7D:D7:9B:F9:1C:1C:6B:56:6A:21:9E:D7:66"); -@@ -250,6 +252,8 @@ public class VerifyCACerts { - add("addtrustexternalca [jdk]"); - // Valid until: Sat May 30 10:44:50 GMT 2020 - add("addtrustqualifiedca [jdk]"); -+ // Valid until: Sat May 21 04:00:00 GMT 2022 -+ add("geotrustglobalca [jdk]"); - } - }; - diff --git a/jdk-updates-jdk11u-jdk-11.0.21-ga.tar.xz b/jdk-updates-jdk11u-jdk-11.0.21-ga.tar.xz new file mode 100644 index 0000000000000000000000000000000000000000..687ead166f590deed440470703b5139dafafc2f3 Binary files /dev/null and b/jdk-updates-jdk11u-jdk-11.0.21-ga.tar.xz differ diff --git a/openjdk-11.spec b/openjdk-11.spec index d9e896d93f73b32179c0ac8c575d8843f794c1e3..577745d4fd4791270ef7cb15b53c0d237016af9c 100644 --- a/openjdk-11.spec +++ b/openjdk-11.spec @@ -54,6 +54,7 @@ %endif %global aarch64 aarch64 +%global riscv64 riscv64 # By default, we build a debug build during main build on JIT architectures %if %{with slowdebug} @@ -109,12 +110,15 @@ %ifarch %{aarch64} %global archinstall aarch64 %endif +%ifarch %{riscv64} +%global archinstall riscv64 +%endif %global with_systemtap 1 # New Version-String scheme-style defines %global majorver 11 -%global securityver 19 +%global securityver 21 # buildjdkver is usually same as %%{majorver}, # but in time of bootstrap of next jdk, it is majorver-1, # and this it is better to change it here, on single place @@ -130,12 +134,12 @@ %global origin_nice OpenJDK %global top_level_dir_name %{origin} %global minorver 0 -%global buildver 7 +%global buildver 9 %global patchver 0 %global project jdk-updates %global repo jdk11u -%global revision jdk-11.0.19-ga +%global revision jdk-11.0.21-ga %global full_revision %{project}-%{repo}-%{revision} # priority must be 7 digits in total # setting to 1, so debug ones can have 0 @@ -284,7 +288,9 @@ ext=.gz alternatives \\ --install %{_bindir}/javac javac %{sdkbindir -- %{?1}}/javac $PRIORITY --family %{name}.%{_arch} \\ --slave %{_jvmdir}/java java_sdk %{_jvmdir}/%{sdkdir -- %{?1}} \\ +%ifarch %{aarch64} x86_64 --slave %{_bindir}/jaotc jaotc %{sdkbindir -- %{?1}}/jaotc \\ +%endif --slave %{_bindir}/jlink jlink %{sdkbindir -- %{?1}}/jlink \\ --slave %{_bindir}/jmod jmod %{sdkbindir -- %{?1}}/jmod \\ --slave %{_bindir}/jhsdb jhsdb %{sdkbindir -- %{?1}}/jhsdb \\ @@ -555,7 +561,9 @@ exit 0 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/jstatd %{_jvmdir}/%{sdkdir -- %{?1}}/bin/rmic %{_jvmdir}/%{sdkdir -- %{?1}}/bin/serialver +%ifarch %{aarch64} x86_64 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/jaotc +%endif %{_jvmdir}/%{sdkdir -- %{?1}}/include %{_jvmdir}/%{sdkdir -- %{?1}}/lib/ct.sym %if %{with_systemtap} @@ -740,7 +748,7 @@ Provides: java-src%{?1} = %{epoch}:%{version}-%{release} Name: java-%{javaver}-%{origin} Version: %{newjavaver}.%{buildver} -Release: 0 +Release: 1 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -845,7 +853,7 @@ Patch61: downgrade-the-symver-of-log2f-posix-spawn.patch Patch62: 8254078-DataOutputStream-is-very-slow-post-disabling.patch Patch65: add-LazyBox-feature.patch Patch66: add-G1-Full-GC-optimization.patch -Patch68: src-openeuler-openjdk-11-resolve-code-inconsistencies.patch +Patch68: src-openeuler-openjdk-11-resolve-code-inconsistencies.patch Patch69: G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch # 11.0.11 @@ -875,9 +883,18 @@ Patch89: downgrade-the-symver-of-memcpy-GLIBC_2.14-on-x86.patch # 11.0.16 Patch90: fix_Internal_and_external_code_inconsistency.patch -# 11.0.18 +# 11.0.18 Patch91: 8222289-Overhaul-logic-for-reading-writing-constant-pool-entries.patch +# 11.0.21 +Patch92: 8295068-SSLEngine-throws-NPE-parsing-Certificate.patch +############################################ +# +# riscv64 specific patches +# +############################################ +Patch2000: 2000-Add-riscv64-support-based-on-bishengjdk-riscv-branch.patch + BuildRequires: elfutils-extra BuildRequires: autoconf BuildRequires: alsa-lib-devel @@ -1109,6 +1126,9 @@ fi pushd %{top_level_dir_name} # OpenJDK patches +%ifarch riscv64 +%patch2000 -p1 +%else %patch5 -p1 %patch6 -p1 %patch7 -p1 @@ -1168,6 +1188,8 @@ pushd %{top_level_dir_name} %patch89 -p1 %patch90 -p1 %patch91 -p1 +%patch92 -p1 +%endif popd # openjdk # %patch1000 @@ -1228,7 +1250,7 @@ export NUM_PROC=${NUM_PROC:-1} [ ${NUM_PROC} -gt %{?_smp_ncpus_max} ] && export NUM_PROC=%{?_smp_ncpus_max} %endif -%ifarch %{aarch64} +%ifarch %{aarch64} riscv64 export ARCH_DATA_MODEL=64 %endif @@ -1328,7 +1350,7 @@ export JAVA_HOME=$(pwd)/%{buildoutputdir -- $suffix}/images/%{jdkimage} # Check debug symbols are present and can identify code find "$JAVA_HOME" -iname '*.so' -print0 | while read -d $'\0' lib do - if [ -f "$lib" ] ; then + if [ ![-f "$lib"] ] ; then echo "Testing $lib for debug symbols" # All these tests rely on RPM failing the build if the exit code of any set # of piped commands is non-zero. @@ -1371,7 +1393,7 @@ done # Make sure gdb can do a backtrace based on line numbers on libjvm.so # javaCalls.cpp:58 should map to: -# http://hg.openjdk.java.net/jdk8u/jdk8u/hotspot/file/ff3b27e6bcc2/src/share/vm/runtime/javaCalls.cpp#l58 +# http://hg.openjdk.java.net/jdk8u/jdk8u/hotspot/file/ff3b27e6bcc2/src/share/vm/runtime/javaCalls.cpp#l58 # Using line number 1 might cause build problems. See: gdb -q "$JAVA_HOME/bin/java" < - 1:11.0.19.7-0 +* Mon Oct 23 2023 DXwangg - 1:11.0.21.9-1 +- add 8295068-SSLEngine-throws-NPE-parsing-Certificate.patch + + +* Thu Oct 19 2023 DXwangg - 1:11.0.21.9-0 +- update to 11.0.21+9(GA) +- modified delete_expired_certificates.patch +- modified G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch +- modified 8210473-JEP-345-NUMA-Aware-Memory-Allocation-for-G1.patch +- modified 8214527-AArch64-ZGC-for-Aarch64.patch + +* Thu Aug 17 2023 misaka00251 - 1:11.0.20.8-2 +- Add riscv64 support (based on bishengjdk riscv branch) + +* Wed Aug 2023 noah - 1:11.0.20.8-1 +- fix CPUBench kmeans random fails + +* Wed Jul 2023 DXwangg - 1:11.0.20.8-0 +- update to 11.0.20+8(GA) +- modified delete_expired_certificates.patch + + +* Thu Apr 2023 DXwangg - 1:11.0.19.7-0 - update to 11.0.19+7(GA) - deleted 8225648-TESTBUG-java-lang-annotation-loaderLeak-Main.patch - modified Add-KAE-implementation.patch - modified G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch - modified delete_expired_certificates.patch - modified 8205921-Optimizing-best_of_2-work-stealing-queue-selection.patch + +* Thu Jan 5 2023 Henry_Yang - 1:11.0.18.10-1 - add 8222289-Overhaul-logic-for-reading-writing-constant-pool-entries.patch -- modified 8224675-Late-GC-barrier-insertion-for-ZGC.patch + +* Thu Jan 5 2023 DXwangg - 1:11.0.18.10-0 +- update to 11.0.18+10(GA) - modified 8231441-2-AArch64-Initial-SVE-backend-support.patch -- deleted 8290705_fix_StringConcat_validate_mem_flow_asserts_with_unexpected_userStoreI.patch +- delete 8290705_fix_StringConcat_validate_mem_flow_asserts_with_unexpected_userStoreI.patch * Wed Oct 19 2022 DXwangg - 1:11.0.17.8-0 - update to 11.0.17+8(GA) @@ -1862,7 +1910,7 @@ cjc.mainProgram(arg) - add 8248336-AArch64-C2-offset-overflow-in-BoxLockNode-em.patch * Mon Oct 26 2020 noah - 1:11.0.9.11-1 -- add 8229495-SIGILL-in-C2-generated-OSR-compilation.patch +- add 8229495-SIGILL-in-C2-generated-OSR-compilation.patch * Thu Oct 22 2020 noah - 1:11.0.9.11-0 - Update to 11.0.9+11 (GA)